Added logic to check the s3 bucket to make sure the file exists and delete orphan files
All checks were successful
Locusworks Team/aws-s3-sync/pipeline/head This commit looks good
All checks were successful
Locusworks Team/aws-s3-sync/pipeline/head This commit looks good
This commit is contained in:
74
pom.xml
74
pom.xml
@ -12,8 +12,82 @@
|
||||
<nexus.repo>https://nexus.locusworks.net</nexus.repo>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<dep.check.version>6.0.2</dep.check.version>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-enforcer-plugin</artifactId>
|
||||
<version>3.0.0-M2</version>
|
||||
<configuration>
|
||||
<rules>
|
||||
<dependencyConvergence />
|
||||
</rules>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.owasp</groupId>
|
||||
<artifactId>dependency-check-maven</artifactId>
|
||||
<version>${dep.check.version}</version>
|
||||
<configuration>
|
||||
<skipProvidedScope>true</skipProvidedScope>
|
||||
<skipTestScope>true</skipTestScope>
|
||||
<failOnError>false</failOnError>
|
||||
<versionCheckEnabled>true</versionCheckEnabled>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>check</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<finalName>S3Sync-${project.version}</finalName>
|
||||
<transformers>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>net.locusworks.s3sync.Entry</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
|
@ -4,6 +4,7 @@ import net.locusworks.logger.ApplicationLogger;
|
||||
import net.locusworks.logger.ApplicationLoggerFactory;
|
||||
import net.locusworks.logger.ApplicationLoggerInitializer;
|
||||
import net.locusworks.logger.LogLevel;
|
||||
import net.locusworks.s3sync.client.FileManager;
|
||||
import net.locusworks.s3sync.client.S3Client;
|
||||
import net.locusworks.s3sync.conf.ConfigurationManager;
|
||||
|
||||
@ -36,7 +37,9 @@ public class Entry {
|
||||
logger.info("Starting S3 Sync");
|
||||
|
||||
try (S3Client client = new S3Client(ConfigurationManager.getInstance())) {
|
||||
FileManager manager = FileManager.newInstance(client);
|
||||
client.syncFolder();
|
||||
manager.removeOrphanedFiles();
|
||||
} catch (Exception | Error e) {
|
||||
logger.error(e);
|
||||
System.exit(-1);
|
||||
|
@ -6,8 +6,10 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import com.amazonaws.services.s3.model.S3Object;
|
||||
@ -21,6 +23,8 @@ public class FileManager implements AutoCloseable {
|
||||
public static final String FILE_CSV = "upload.csv";
|
||||
|
||||
private Map<String, FileDetail> detailMap;
|
||||
|
||||
private Set<String> s3Files;
|
||||
|
||||
private S3Client client;
|
||||
|
||||
@ -30,19 +34,20 @@ public class FileManager implements AutoCloseable {
|
||||
|
||||
private FileManager(S3Client client) throws IOException {
|
||||
detailMap = new LinkedHashMap<String, FileDetail>();
|
||||
s3Files = new HashSet<String>();
|
||||
this.client = client;
|
||||
this.bucket = client.getBucket();
|
||||
readFile();
|
||||
}
|
||||
|
||||
private void readFile() throws IOException {
|
||||
|
||||
|
||||
S3Object hashFile = client.getObject(bucket, FILE_CSV);
|
||||
if (hashFile == null) return;
|
||||
Path file = Paths.get(FILE_CSV);
|
||||
|
||||
client.downloadFile(FILE_CSV, file);
|
||||
|
||||
s3Files = client.getFileList();
|
||||
if (Files.notExists(file)) return;
|
||||
|
||||
try(BufferedReader reader = Files.newBufferedReader(file)) {
|
||||
@ -77,25 +82,31 @@ public class FileManager implements AutoCloseable {
|
||||
return detailMap.put(fd.getFile(), fd) != null;
|
||||
}
|
||||
|
||||
public FileDetail getFileDetail(Path path) throws IOException {
|
||||
public FileDetail getFileDetail(Path path, String key) throws IOException {
|
||||
boolean newFile = false;
|
||||
String file = path.toString();
|
||||
FileDetail fd = null;
|
||||
if (detailMap.containsKey(file)) {
|
||||
fd = detailMap.get(file);
|
||||
if (detailMap.containsKey(key)) {
|
||||
fd = detailMap.get(key);
|
||||
} else {
|
||||
newFile = true;
|
||||
fd = new FileDetail(file, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
|
||||
fd = new FileDetail(key, DigestUtils.sha1Hex(Files.newInputStream(path)), false);
|
||||
}
|
||||
|
||||
String sha1 = newFile ? fd.getHash() : DigestUtils.sha1Hex(Files.newInputStream(path));
|
||||
|
||||
if (!sha1.equals(fd.getHash())) {
|
||||
|
||||
if (sha1.equals(fd.getHash()) && s3Files.contains(key)) {
|
||||
fd.setUploaded(true);
|
||||
s3Files.remove(key);
|
||||
} else {
|
||||
fd.setUploaded(false);
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
public void removeOrphanedFiles() {
|
||||
client.removeFiles(s3Files);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
|
@ -3,16 +3,16 @@ package net.locusworks.s3sync.client;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import com.amazonaws.AmazonClientException;
|
||||
import com.amazonaws.AmazonServiceException;
|
||||
import com.amazonaws.SdkClientException;
|
||||
import com.amazonaws.regions.Regions;
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
|
||||
import com.amazonaws.services.s3.model.Bucket;
|
||||
import com.amazonaws.services.s3.model.GetObjectRequest;
|
||||
import com.amazonaws.services.s3.model.Permission;
|
||||
import com.amazonaws.services.s3.model.S3Object;
|
||||
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
||||
import com.amazonaws.services.s3.transfer.Download;
|
||||
import com.amazonaws.services.s3.transfer.TransferManager;
|
||||
import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
|
||||
@ -59,10 +59,11 @@ public class S3Client implements AutoCloseable {
|
||||
xferMgr = !xferMgrNull ? xferMgr : TransferManagerBuilder.standard().withS3Client(s3Client).build();
|
||||
FileDetail fd = null;
|
||||
try {
|
||||
fd = FileManager.getInstance().getFileDetail(file);
|
||||
String key = getPath(file);
|
||||
fd = FileManager.getInstance().getFileDetail(file, key);
|
||||
if (fd.isUploaded()) return;
|
||||
logger.info("Uploading file: %s", file);
|
||||
Upload xfer = xferMgr.upload(bucket, getPath(file), file.toFile());
|
||||
Upload xfer = xferMgr.upload(bucket, key, file.toFile());
|
||||
xfer.waitForCompletion();
|
||||
fd.setUploaded(true);
|
||||
FileManager.getInstance().addEntry(fd);
|
||||
@ -109,7 +110,7 @@ public class S3Client implements AutoCloseable {
|
||||
|
||||
public void syncFolder() throws IOException {
|
||||
TransferManager xferMgr = TransferManagerBuilder.standard().withS3Client(s3Client).build();
|
||||
try (FileManager manager = FileManager.newInstance(this)) {
|
||||
try (FileManager manager = FileManager.getInstance()) {
|
||||
Files.walk(syncFolder)
|
||||
.filter(f -> Files.isRegularFile(f))
|
||||
.forEach(f -> uploadFile(xferMgr, syncFolder.resolve(f)));
|
||||
@ -120,65 +121,31 @@ public class S3Client implements AutoCloseable {
|
||||
xferMgr.shutdownNow(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Gets the object stored in Amazon S3 under the specified bucket and key.
|
||||
* </p>
|
||||
* <p>
|
||||
* Be extremely careful when using this method; the returned Amazon S3
|
||||
* object contains a direct stream of data from the HTTP connection. The
|
||||
* underlying HTTP connection cannot be reused until the user finishes
|
||||
* reading the data and closes the stream. Also note that if not all data
|
||||
* is read from the stream then the SDK will abort the underlying connection,
|
||||
* this may have a negative impact on performance. Therefore:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>Use the data from the input stream in Amazon S3 object as soon as possible</li>
|
||||
* <li>Read all data from the stream (use {@link GetObjectRequest#setRange(long, long)} to request only the bytes you need)</li>
|
||||
* <li>Close the input stream in Amazon S3 object as soon as possible</li>
|
||||
* </ul>
|
||||
* If these rules are not followed, the client can run out of resources by
|
||||
* allocating too many open, but unused, HTTP connections. </p>
|
||||
* <p>
|
||||
* To get an object from Amazon S3, the caller must have
|
||||
* {@link Permission#Read} access to the object.
|
||||
* </p>
|
||||
* <p>
|
||||
* If the object fetched is publicly readable, it can also read it by
|
||||
* pasting its URL into a browser.
|
||||
* </p>
|
||||
* <p>
|
||||
* For more advanced options (such as downloading only a range of an
|
||||
* object's content, or placing constraints on when the object should be
|
||||
* downloaded) callers can use {@link #getObject(GetObjectRequest)}.
|
||||
* </p>
|
||||
* <p>
|
||||
* If you are accessing <a href="http://aws.amazon.com/kms/">AWS
|
||||
* KMS</a>-encrypted objects, you need to specify the correct region of the
|
||||
* bucket on your client and configure AWS Signature Version 4 for added
|
||||
* security. For more information on how to do this, see
|
||||
* http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#
|
||||
* specify-signature-version
|
||||
* </p>
|
||||
*
|
||||
* @param bucketName
|
||||
* The name of the bucket containing the desired object.
|
||||
* @param key
|
||||
* The key under which the desired object is stored.
|
||||
*
|
||||
* @return The object stored in Amazon S3 in the specified bucket and key.
|
||||
*
|
||||
* @throws SdkClientException
|
||||
* If any errors are encountered in the client while making the
|
||||
* request or handling the response.
|
||||
*
|
||||
*/
|
||||
public S3Object getObject(String bucketName, String key) {
|
||||
try {
|
||||
return s3Client.getObject(bucketName, key);
|
||||
} catch (AmazonServiceException ex) {}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Set<String> getFileList() {
|
||||
Set<String> fileList = new HashSet<String>();
|
||||
for(S3ObjectSummary os: s3Client.listObjectsV2(getBucket()).getObjectSummaries()) {
|
||||
fileList.add(os.getKey());
|
||||
}
|
||||
return fileList;
|
||||
}
|
||||
|
||||
public void removeFiles(Set<String> s3Files) {
|
||||
for(String key : s3Files) {
|
||||
try {
|
||||
logger.info("Removing file: " + key);
|
||||
s3Client.deleteObject(getBucket(), key);
|
||||
} catch (AmazonServiceException ex) {
|
||||
logger.warn(String.format("Unable to delete %s: %s", key, ex.getMessage()), ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getPath(Path file) {
|
||||
if (file.getParent() == null) return file.getFileName().toString();
|
||||
|
Reference in New Issue
Block a user