From e12e45d2aab327698bba3d4111d3598f5371d57c Mon Sep 17 00:00:00 2001 From: Isaac Parenteau Date: Wed, 11 Nov 2020 23:15:23 -0600 Subject: [PATCH] Added logic to check the s3 bucket to make sure the file exists and delete orphan files --- pom.xml | 74 ++++++++++++++++ .../java/net/locusworks/s3sync/Entry.java | 3 + .../locusworks/s3sync/client/FileManager.java | 29 +++++-- .../locusworks/s3sync/client/S3Client.java | 85 ++++++------------- 4 files changed, 123 insertions(+), 68 deletions(-) diff --git a/pom.xml b/pom.xml index 9e3145e..71c374c 100644 --- a/pom.xml +++ b/pom.xml @@ -12,8 +12,82 @@ https://nexus.locusworks.net 1.8 1.8 + 6.0.2 + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M2 + + + + + + + + maven-compiler-plugin + 3.1 + + 1.8 + 1.8 + + + + org.owasp + dependency-check-maven + ${dep.check.version} + + true + true + false + true + + + + + check + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.1 + + + package + + shade + + + S3Sync-${project.version} + + + net.locusworks.s3sync.Entry + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + com.amazonaws diff --git a/src/main/java/net/locusworks/s3sync/Entry.java b/src/main/java/net/locusworks/s3sync/Entry.java index bcae437..99acab5 100644 --- a/src/main/java/net/locusworks/s3sync/Entry.java +++ b/src/main/java/net/locusworks/s3sync/Entry.java @@ -4,6 +4,7 @@ import net.locusworks.logger.ApplicationLogger; import net.locusworks.logger.ApplicationLoggerFactory; import net.locusworks.logger.ApplicationLoggerInitializer; import net.locusworks.logger.LogLevel; +import net.locusworks.s3sync.client.FileManager; import net.locusworks.s3sync.client.S3Client; import net.locusworks.s3sync.conf.ConfigurationManager; @@ -36,7 +37,9 @@ public class Entry { logger.info("Starting S3 Sync"); try (S3Client client = new S3Client(ConfigurationManager.getInstance())) { + FileManager manager = FileManager.newInstance(client); client.syncFolder(); + manager.removeOrphanedFiles(); } catch (Exception | Error e) { logger.error(e); System.exit(-1); diff --git a/src/main/java/net/locusworks/s3sync/client/FileManager.java b/src/main/java/net/locusworks/s3sync/client/FileManager.java index d37a490..18b027b 100644 --- a/src/main/java/net/locusworks/s3sync/client/FileManager.java +++ b/src/main/java/net/locusworks/s3sync/client/FileManager.java @@ -6,8 +6,10 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Set; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import com.amazonaws.services.s3.model.S3Object; @@ -21,6 +23,8 @@ public class FileManager implements AutoCloseable { public static final String FILE_CSV = "upload.csv"; private Map detailMap; + + private Set s3Files; private S3Client client; @@ -30,19 +34,20 @@ public class FileManager implements AutoCloseable { private FileManager(S3Client client) throws IOException { detailMap = new LinkedHashMap(); + s3Files = new HashSet(); this.client = client; this.bucket = client.getBucket(); readFile(); } private void readFile() throws IOException { - + S3Object hashFile = client.getObject(bucket, FILE_CSV); if (hashFile == null) return; Path file = Paths.get(FILE_CSV); client.downloadFile(FILE_CSV, file); - + s3Files = client.getFileList(); if (Files.notExists(file)) return; try(BufferedReader reader = Files.newBufferedReader(file)) { @@ -77,25 +82,31 @@ public class FileManager implements AutoCloseable { return detailMap.put(fd.getFile(), fd) != null; } - public FileDetail getFileDetail(Path path) throws IOException { + public FileDetail getFileDetail(Path path, String key) throws IOException { boolean newFile = false; - String file = path.toString(); FileDetail fd = null; - if (detailMap.containsKey(file)) { - fd = detailMap.get(file); + if (detailMap.containsKey(key)) { + fd = detailMap.get(key); } else { newFile = true; - fd = new FileDetail(file, DigestUtils.sha1Hex(Files.newInputStream(path)), false); + fd = new FileDetail(key, DigestUtils.sha1Hex(Files.newInputStream(path)), false); } String sha1 = newFile ? fd.getHash() : DigestUtils.sha1Hex(Files.newInputStream(path)); - - if (!sha1.equals(fd.getHash())) { + + if (sha1.equals(fd.getHash()) && s3Files.contains(key)) { + fd.setUploaded(true); + s3Files.remove(key); + } else { fd.setUploaded(false); } return fd; } + + public void removeOrphanedFiles() { + client.removeFiles(s3Files); + } @Override public void close() throws Exception { diff --git a/src/main/java/net/locusworks/s3sync/client/S3Client.java b/src/main/java/net/locusworks/s3sync/client/S3Client.java index 1763271..e2c01f1 100644 --- a/src/main/java/net/locusworks/s3sync/client/S3Client.java +++ b/src/main/java/net/locusworks/s3sync/client/S3Client.java @@ -3,16 +3,16 @@ package net.locusworks.s3sync.client; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkClientException; import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.Bucket; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.Permission; import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.amazonaws.services.s3.transfer.Download; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerBuilder; @@ -59,10 +59,11 @@ public class S3Client implements AutoCloseable { xferMgr = !xferMgrNull ? xferMgr : TransferManagerBuilder.standard().withS3Client(s3Client).build(); FileDetail fd = null; try { - fd = FileManager.getInstance().getFileDetail(file); + String key = getPath(file); + fd = FileManager.getInstance().getFileDetail(file, key); if (fd.isUploaded()) return; logger.info("Uploading file: %s", file); - Upload xfer = xferMgr.upload(bucket, getPath(file), file.toFile()); + Upload xfer = xferMgr.upload(bucket, key, file.toFile()); xfer.waitForCompletion(); fd.setUploaded(true); FileManager.getInstance().addEntry(fd); @@ -109,7 +110,7 @@ public class S3Client implements AutoCloseable { public void syncFolder() throws IOException { TransferManager xferMgr = TransferManagerBuilder.standard().withS3Client(s3Client).build(); - try (FileManager manager = FileManager.newInstance(this)) { + try (FileManager manager = FileManager.getInstance()) { Files.walk(syncFolder) .filter(f -> Files.isRegularFile(f)) .forEach(f -> uploadFile(xferMgr, syncFolder.resolve(f))); @@ -120,65 +121,31 @@ public class S3Client implements AutoCloseable { xferMgr.shutdownNow(false); } - /** - *

- * Gets the object stored in Amazon S3 under the specified bucket and key. - *

- *

- * Be extremely careful when using this method; the returned Amazon S3 - * object contains a direct stream of data from the HTTP connection. The - * underlying HTTP connection cannot be reused until the user finishes - * reading the data and closes the stream. Also note that if not all data - * is read from the stream then the SDK will abort the underlying connection, - * this may have a negative impact on performance. Therefore: - *

- *
    - *
  • Use the data from the input stream in Amazon S3 object as soon as possible
  • - *
  • Read all data from the stream (use {@link GetObjectRequest#setRange(long, long)} to request only the bytes you need)
  • - *
  • Close the input stream in Amazon S3 object as soon as possible
  • - *
- * If these rules are not followed, the client can run out of resources by - * allocating too many open, but unused, HTTP connections.

- *

- * To get an object from Amazon S3, the caller must have - * {@link Permission#Read} access to the object. - *

- *

- * If the object fetched is publicly readable, it can also read it by - * pasting its URL into a browser. - *

- *

- * For more advanced options (such as downloading only a range of an - * object's content, or placing constraints on when the object should be - * downloaded) callers can use {@link #getObject(GetObjectRequest)}. - *

- *

- * If you are accessing AWS - * KMS-encrypted objects, you need to specify the correct region of the - * bucket on your client and configure AWS Signature Version 4 for added - * security. For more information on how to do this, see - * http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html# - * specify-signature-version - *

- * - * @param bucketName - * The name of the bucket containing the desired object. - * @param key - * The key under which the desired object is stored. - * - * @return The object stored in Amazon S3 in the specified bucket and key. - * - * @throws SdkClientException - * If any errors are encountered in the client while making the - * request or handling the response. - * - */ public S3Object getObject(String bucketName, String key) { try { return s3Client.getObject(bucketName, key); } catch (AmazonServiceException ex) {} return null; } + + public Set getFileList() { + Set fileList = new HashSet(); + for(S3ObjectSummary os: s3Client.listObjectsV2(getBucket()).getObjectSummaries()) { + fileList.add(os.getKey()); + } + return fileList; + } + + public void removeFiles(Set s3Files) { + for(String key : s3Files) { + try { + logger.info("Removing file: " + key); + s3Client.deleteObject(getBucket(), key); + } catch (AmazonServiceException ex) { + logger.warn(String.format("Unable to delete %s: %s", key, ex.getMessage()), ex); + } + } + } private String getPath(Path file) { if (file.getParent() == null) return file.getFileName().toString();