Added logic to check the s3 bucket to make sure the file exists and delete orphan files

2020-11-11 23:15:23 -06:00
parent 70f54ea2ee
commit e12e45d2aa
4 changed files with 123 additions and 68 deletions
--- a/pom.xml
+++ b/pom.xml
@ -12,8 +12,82 @@
 		<nexus.repo>https://nexus.locusworks.net</nexus.repo>
 		<maven.compiler.source>1.8</maven.compiler.source>
 		<maven.compiler.target>1.8</maven.compiler.target>
+		<dep.check.version>6.0.2</dep.check.version>
 	</properties>

+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-enforcer-plugin</artifactId>
+				<version>3.0.0-M2</version>
+				<configuration>
+					<rules>
+						<dependencyConvergence />
+					</rules>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>3.1</version>
+				<configuration>
+					<source>1.8</source>
+					<target>1.8</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.owasp</groupId>
+				<artifactId>dependency-check-maven</artifactId>
+				<version>${dep.check.version}</version>
+				<configuration>
+					<skipProvidedScope>true</skipProvidedScope>
+					<skipTestScope>true</skipTestScope>
+					<failOnError>false</failOnError>
+					<versionCheckEnabled>true</versionCheckEnabled>
+				</configuration>
+				<executions>
+					<execution>
+						<goals>
+							<goal>check</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-shade-plugin</artifactId>
+				<version>3.2.1</version>
+				<executions>
+					<execution>
+						<phase>package</phase>
+						<goals>
+							<goal>shade</goal>
+						</goals>
+						<configuration>
+							<finalName>S3Sync-${project.version}</finalName>
+							<transformers>
+								<transformer
+									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+									<mainClass>net.locusworks.s3sync.Entry</mainClass>
+								</transformer>
+							</transformers>
+							<filters>
+								<filter>
+									<artifact>*:*</artifact>
+									<excludes>
+										<exclude>META-INF/*.SF</exclude>
+										<exclude>META-INF/*.DSA</exclude>
+										<exclude>META-INF/*.RSA</exclude>
+									</excludes>
+								</filter>
+							</filters>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+		</plugins>
+	</build>
+
 	<dependencies>
 		<dependency>
 			<groupId>com.amazonaws</groupId>
--- a/src/main/java/net/locusworks/s3sync/Entry.java
+++ b/src/main/java/net/locusworks/s3sync/Entry.java
@ -4,6 +4,7 @@ import net.locusworks.logger.ApplicationLogger;
 import net.locusworks.logger.ApplicationLoggerFactory;
 import net.locusworks.logger.ApplicationLoggerInitializer;
 import net.locusworks.logger.LogLevel;
+import net.locusworks.s3sync.client.FileManager;
 import net.locusworks.s3sync.client.S3Client;
 import net.locusworks.s3sync.conf.ConfigurationManager;

@ -36,7 +37,9 @@ public class Entry {
    logger.info("Starting S3 Sync");
    
    try (S3Client client = new S3Client(ConfigurationManager.getInstance())) {
+      FileManager manager = FileManager.newInstance(client);
      client.syncFolder();
+      manager.removeOrphanedFiles();
    } catch (Exception | Error e) {
      logger.error(e);
      System.exit(-1);
--- a/src/main/java/net/locusworks/s3sync/client/FileManager.java
+++ b/src/main/java/net/locusworks/s3sync/client/FileManager.java
@ -6,8 +6,10 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Set;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.lang3.StringUtils;
 import com.amazonaws.services.s3.model.S3Object;
@ -21,6 +23,8 @@ public class FileManager implements AutoCloseable {
  public static final String FILE_CSV = "upload.csv";

  private Map<String, FileDetail> detailMap;
+  
+  private Set<String> s3Files;

  private S3Client client;

@ -30,19 +34,20 @@ public class FileManager implements AutoCloseable {

  private FileManager(S3Client client) throws IOException {
    detailMap = new LinkedHashMap<String, FileDetail>();
+    s3Files = new HashSet<String>();
    this.client = client;
    this.bucket = client.getBucket();
    readFile();
  }

  private void readFile() throws IOException {
-
+    
    S3Object hashFile = client.getObject(bucket, FILE_CSV);
    if (hashFile == null) return;
    Path file = Paths.get(FILE_CSV);
    
    client.downloadFile(FILE_CSV, file);
-
+    s3Files = client.getFileList();
    if (Files.notExists(file)) return;

    try(BufferedReader reader = Files.newBufferedReader(file)) {
@ -77,25 +82,31 @@ public class FileManager implements AutoCloseable {
    return detailMap.put(fd.getFile(), fd) != null;
  }

-  public FileDetail getFileDetail(Path path) throws IOException {
+  public FileDetail getFileDetail(Path path, String key) throws IOException {
    boolean newFile = false;
-    String file = path.toString();
    FileDetail fd = null;
-    if (detailMap.containsKey(file)) {
-      fd = detailMap.get(file);
+    if (detailMap.containsKey(key)) {
+      fd = detailMap.get(key);
    } else {
      newFile = true;
-      fd = new FileDetail(file, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
+      fd = new FileDetail(key, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
    }

    String sha1 = newFile ? fd.getHash() : DigestUtils.sha1Hex(Files.newInputStream(path));
-
-    if (!sha1.equals(fd.getHash())) {
+    
+    if (sha1.equals(fd.getHash()) && s3Files.contains(key)) {
+      fd.setUploaded(true);
+      s3Files.remove(key);
+    } else {
      fd.setUploaded(false);
    }

    return fd;
  }
+  
+  public void removeOrphanedFiles() {
+    client.removeFiles(s3Files);
+  }

  @Override
  public void close() throws Exception {
--- a/src/main/java/net/locusworks/s3sync/client/S3Client.java
+++ b/src/main/java/net/locusworks/s3sync/client/S3Client.java
@ -3,16 +3,16 @@ package net.locusworks.s3sync.client;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.HashSet;
+import java.util.Set;
 import com.amazonaws.AmazonClientException;
 import com.amazonaws.AmazonServiceException;
-import com.amazonaws.SdkClientException;
 import com.amazonaws.regions.Regions;
 import com.amazonaws.services.s3.AmazonS3;
 import com.amazonaws.services.s3.AmazonS3ClientBuilder;
 import com.amazonaws.services.s3.model.Bucket;
-import com.amazonaws.services.s3.model.GetObjectRequest;
-import com.amazonaws.services.s3.model.Permission;
 import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
 import com.amazonaws.services.s3.transfer.Download;
 import com.amazonaws.services.s3.transfer.TransferManager;
 import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
@ -59,10 +59,11 @@ public class S3Client implements AutoCloseable {
    xferMgr = !xferMgrNull ? xferMgr : TransferManagerBuilder.standard().withS3Client(s3Client).build();
    FileDetail fd = null;
    try {
-      fd = FileManager.getInstance().getFileDetail(file);
+      String key = getPath(file);
+      fd = FileManager.getInstance().getFileDetail(file, key);
      if (fd.isUploaded()) return;
      logger.info("Uploading file: %s", file);
-      Upload xfer = xferMgr.upload(bucket, getPath(file), file.toFile());
+      Upload xfer = xferMgr.upload(bucket, key, file.toFile());
      xfer.waitForCompletion();
      fd.setUploaded(true);
      FileManager.getInstance().addEntry(fd);
@ -109,7 +110,7 @@ public class S3Client implements AutoCloseable {

  public void syncFolder() throws IOException {
    TransferManager xferMgr = TransferManagerBuilder.standard().withS3Client(s3Client).build();
-    try (FileManager manager = FileManager.newInstance(this)) {
+    try (FileManager manager = FileManager.getInstance()) {
      Files.walk(syncFolder)
      .filter(f -> Files.isRegularFile(f))
      .forEach(f -> uploadFile(xferMgr, syncFolder.resolve(f)));
@ -120,65 +121,31 @@ public class S3Client implements AutoCloseable {
    xferMgr.shutdownNow(false);
  }
  
-  /**
-   * <p>
-   * Gets the object stored in Amazon S3 under the specified bucket and key.
-   * </p>
-   * <p>
-   * Be extremely careful when using this method; the returned Amazon S3
-   * object contains a direct stream of data from the HTTP connection. The
-   * underlying HTTP connection cannot be reused until the user finishes
-   * reading the data and closes the stream. Also note that if not all data
-   * is read from the stream then the SDK will abort the underlying connection,
-   * this may have a negative impact on performance. Therefore:
-   * </p>
-   * <ul>
-   * <li>Use the data from the input stream in Amazon S3 object as soon as possible</li>
-   * <li>Read all data from the stream (use {@link GetObjectRequest#setRange(long, long)} to request only the bytes you need)</li>
-   * <li>Close the input stream in Amazon S3 object as soon as possible</li>
-   * </ul>
-   * If these rules are not followed, the client can run out of resources by
-   * allocating too many open, but unused, HTTP connections. </p>
-   * <p>
-   * To get an object from Amazon S3, the caller must have
-   * {@link Permission#Read} access to the object.
-   * </p>
-   * <p>
-   * If the object fetched is publicly readable, it can also read it by
-   * pasting its URL into a browser.
-   * </p>
-   * <p>
-   * For more advanced options (such as downloading only a range of an
-   * object's content, or placing constraints on when the object should be
-   * downloaded) callers can use {@link #getObject(GetObjectRequest)}.
-   * </p>
-   * <p>
-   * If you are accessing <a href="http://aws.amazon.com/kms/">AWS
-   * KMS</a>-encrypted objects, you need to specify the correct region of the
-   * bucket on your client and configure AWS Signature Version 4 for added
-   * security. For more information on how to do this, see
-   * http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#
-   * specify-signature-version
-   * </p>
-   *
-   * @param bucketName
-   *            The name of the bucket containing the desired object.
-   * @param key
-   *            The key under which the desired object is stored.
-   *
-   * @return The object stored in Amazon S3 in the specified bucket and key.
-   *
-   * @throws SdkClientException
-   *             If any errors are encountered in the client while making the
-   *             request or handling the response.
-   *
-   */
  public S3Object getObject(String bucketName, String key) {
    try {
      return s3Client.getObject(bucketName, key);
    } catch (AmazonServiceException ex) {}
    return null;
  }
+  
+  public Set<String> getFileList() {
+    Set<String> fileList = new HashSet<String>();
+    for(S3ObjectSummary os: s3Client.listObjectsV2(getBucket()).getObjectSummaries()) {
+      fileList.add(os.getKey());
+    }
+    return fileList;
+  }
+  
+  public void removeFiles(Set<String> s3Files) {
+    for(String key : s3Files) {
+      try {
+        logger.info("Removing file: " + key);
+        s3Client.deleteObject(getBucket(), key);
+      } catch (AmazonServiceException ex) {
+        logger.warn(String.format("Unable to delete %s: %s", key, ex.getMessage()), ex);
+      }
+    }
+  }
 
  private String getPath(Path file) {
    if (file.getParent() == null) return file.getFileName().toString();