Added logic to check the s3 bucket to make sure the file exists and delete orphan files
All checks were successful
Locusworks Team/aws-s3-sync/pipeline/head This commit looks good

This commit is contained in:
2020-11-11 23:15:23 -06:00
parent 70f54ea2ee
commit e12e45d2aa
4 changed files with 123 additions and 68 deletions

74
pom.xml
View File

@ -12,8 +12,82 @@
<nexus.repo>https://nexus.locusworks.net</nexus.repo>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<dep.check.version>6.0.2</dep.check.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>3.0.0-M2</version>
<configuration>
<rules>
<dependencyConvergence />
</rules>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>${dep.check.version}</version>
<configuration>
<skipProvidedScope>true</skipProvidedScope>
<skipTestScope>true</skipTestScope>
<failOnError>false</failOnError>
<versionCheckEnabled>true</versionCheckEnabled>
</configuration>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<finalName>S3Sync-${project.version}</finalName>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>net.locusworks.s3sync.Entry</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>

View File

@ -4,6 +4,7 @@ import net.locusworks.logger.ApplicationLogger;
import net.locusworks.logger.ApplicationLoggerFactory;
import net.locusworks.logger.ApplicationLoggerInitializer;
import net.locusworks.logger.LogLevel;
import net.locusworks.s3sync.client.FileManager;
import net.locusworks.s3sync.client.S3Client;
import net.locusworks.s3sync.conf.ConfigurationManager;
@ -36,7 +37,9 @@ public class Entry {
logger.info("Starting S3 Sync");
try (S3Client client = new S3Client(ConfigurationManager.getInstance())) {
FileManager manager = FileManager.newInstance(client);
client.syncFolder();
manager.removeOrphanedFiles();
} catch (Exception | Error e) {
logger.error(e);
System.exit(-1);

View File

@ -6,8 +6,10 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import com.amazonaws.services.s3.model.S3Object;
@ -21,6 +23,8 @@ public class FileManager implements AutoCloseable {
public static final String FILE_CSV = "upload.csv";
private Map<String, FileDetail> detailMap;
private Set<String> s3Files;
private S3Client client;
@ -30,19 +34,20 @@ public class FileManager implements AutoCloseable {
private FileManager(S3Client client) throws IOException {
detailMap = new LinkedHashMap<String, FileDetail>();
s3Files = new HashSet<String>();
this.client = client;
this.bucket = client.getBucket();
readFile();
}
private void readFile() throws IOException {
S3Object hashFile = client.getObject(bucket, FILE_CSV);
if (hashFile == null) return;
Path file = Paths.get(FILE_CSV);
client.downloadFile(FILE_CSV, file);
s3Files = client.getFileList();
if (Files.notExists(file)) return;
try(BufferedReader reader = Files.newBufferedReader(file)) {
@ -77,25 +82,31 @@ public class FileManager implements AutoCloseable {
return detailMap.put(fd.getFile(), fd) != null;
}
public FileDetail getFileDetail(Path path) throws IOException {
public FileDetail getFileDetail(Path path, String key) throws IOException {
boolean newFile = false;
String file = path.toString();
FileDetail fd = null;
if (detailMap.containsKey(file)) {
fd = detailMap.get(file);
if (detailMap.containsKey(key)) {
fd = detailMap.get(key);
} else {
newFile = true;
fd = new FileDetail(file, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
fd = new FileDetail(key, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
}
String sha1 = newFile ? fd.getHash() : DigestUtils.sha1Hex(Files.newInputStream(path));
if (!sha1.equals(fd.getHash())) {
if (sha1.equals(fd.getHash()) && s3Files.contains(key)) {
fd.setUploaded(true);
s3Files.remove(key);
} else {
fd.setUploaded(false);
}
return fd;
}
public void removeOrphanedFiles() {
client.removeFiles(s3Files);
}
@Override
public void close() throws Exception {

View File

@ -3,16 +3,16 @@ package net.locusworks.s3sync.client;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkClientException;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.Permission;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.amazonaws.services.s3.transfer.Download;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
@ -59,10 +59,11 @@ public class S3Client implements AutoCloseable {
xferMgr = !xferMgrNull ? xferMgr : TransferManagerBuilder.standard().withS3Client(s3Client).build();
FileDetail fd = null;
try {
fd = FileManager.getInstance().getFileDetail(file);
String key = getPath(file);
fd = FileManager.getInstance().getFileDetail(file, key);
if (fd.isUploaded()) return;
logger.info("Uploading file: %s", file);
Upload xfer = xferMgr.upload(bucket, getPath(file), file.toFile());
Upload xfer = xferMgr.upload(bucket, key, file.toFile());
xfer.waitForCompletion();
fd.setUploaded(true);
FileManager.getInstance().addEntry(fd);
@ -109,7 +110,7 @@ public class S3Client implements AutoCloseable {
public void syncFolder() throws IOException {
TransferManager xferMgr = TransferManagerBuilder.standard().withS3Client(s3Client).build();
try (FileManager manager = FileManager.newInstance(this)) {
try (FileManager manager = FileManager.getInstance()) {
Files.walk(syncFolder)
.filter(f -> Files.isRegularFile(f))
.forEach(f -> uploadFile(xferMgr, syncFolder.resolve(f)));
@ -120,65 +121,31 @@ public class S3Client implements AutoCloseable {
xferMgr.shutdownNow(false);
}
/**
* <p>
* Gets the object stored in Amazon S3 under the specified bucket and key.
* </p>
* <p>
* Be extremely careful when using this method; the returned Amazon S3
* object contains a direct stream of data from the HTTP connection. The
* underlying HTTP connection cannot be reused until the user finishes
* reading the data and closes the stream. Also note that if not all data
* is read from the stream then the SDK will abort the underlying connection,
* this may have a negative impact on performance. Therefore:
* </p>
* <ul>
* <li>Use the data from the input stream in Amazon S3 object as soon as possible</li>
* <li>Read all data from the stream (use {@link GetObjectRequest#setRange(long, long)} to request only the bytes you need)</li>
* <li>Close the input stream in Amazon S3 object as soon as possible</li>
* </ul>
* If these rules are not followed, the client can run out of resources by
* allocating too many open, but unused, HTTP connections. </p>
* <p>
* To get an object from Amazon S3, the caller must have
* {@link Permission#Read} access to the object.
* </p>
* <p>
* If the object fetched is publicly readable, it can also read it by
* pasting its URL into a browser.
* </p>
* <p>
* For more advanced options (such as downloading only a range of an
* object's content, or placing constraints on when the object should be
* downloaded) callers can use {@link #getObject(GetObjectRequest)}.
* </p>
* <p>
* If you are accessing <a href="http://aws.amazon.com/kms/">AWS
* KMS</a>-encrypted objects, you need to specify the correct region of the
* bucket on your client and configure AWS Signature Version 4 for added
* security. For more information on how to do this, see
* http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#
* specify-signature-version
* </p>
*
* @param bucketName
* The name of the bucket containing the desired object.
* @param key
* The key under which the desired object is stored.
*
* @return The object stored in Amazon S3 in the specified bucket and key.
*
* @throws SdkClientException
* If any errors are encountered in the client while making the
* request or handling the response.
*
*/
public S3Object getObject(String bucketName, String key) {
try {
return s3Client.getObject(bucketName, key);
} catch (AmazonServiceException ex) {}
return null;
}
public Set<String> getFileList() {
Set<String> fileList = new HashSet<String>();
for(S3ObjectSummary os: s3Client.listObjectsV2(getBucket()).getObjectSummaries()) {
fileList.add(os.getKey());
}
return fileList;
}
public void removeFiles(Set<String> s3Files) {
for(String key : s3Files) {
try {
logger.info("Removing file: " + key);
s3Client.deleteObject(getBucket(), key);
} catch (AmazonServiceException ex) {
logger.warn(String.format("Unable to delete %s: %s", key, ex.getMessage()), ex);
}
}
}
private String getPath(Path file) {
if (file.getParent() == null) return file.getFileName().toString();