diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e8dcbf4d..cab42377 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,9 +60,9 @@ jobs: runs-on: ubuntu-latest environment: ci env: - OCFL_TEST_AWS_ACCESS_KEY: ${{secrets.OCFL_TEST_AWS_ACCESS_KEY}} - OCFL_TEST_AWS_SECRET_KEY: ${{secrets.OCFL_TEST_AWS_SECRET_KEY}} - OCFL_TEST_S3_BUCKET: ${{secrets.OCFL_TEST_S3_BUCKET}} + OCFL_TEST_AWS_ACCESS_KEY: ${{ secrets.OCFL_TEST_AWS_ACCESS_KEY }} + OCFL_TEST_AWS_SECRET_KEY: ${{ secrets.OCFL_TEST_AWS_SECRET_KEY }} + OCFL_TEST_S3_BUCKET: ${{ secrets.OCFL_TEST_S3_BUCKET }} services: postgres: image: postgres:12 @@ -99,9 +99,9 @@ jobs: runs-on: ubuntu-latest environment: ci env: - OCFL_TEST_AWS_ACCESS_KEY: ${{secrets.OCFL_TEST_AWS_ACCESS_KEY}} - OCFL_TEST_AWS_SECRET_KEY: ${{secrets.OCFL_TEST_AWS_SECRET_KEY}} - OCFL_TEST_S3_BUCKET: ${{secrets.OCFL_TEST_S3_BUCKET}} + OCFL_TEST_AWS_ACCESS_KEY: ${{ secrets.OCFL_TEST_AWS_ACCESS_KEY }} + OCFL_TEST_AWS_SECRET_KEY: ${{ secrets.OCFL_TEST_AWS_SECRET_KEY }} + OCFL_TEST_S3_BUCKET: ${{ secrets.OCFL_TEST_S3_BUCKET }} services: mariadb: image: mariadb:10.6 diff --git a/CHANGELOG.md b/CHANGELOG.md index 67565375..ae355cec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ ### Fixed - `ObjectVersionId.equals()` no longer throws an error for HEAD versions: https://github.com/OCFL/ocfl-java/issues/110 +- Deleting an object in S3 that contains more than 1,000 files now works. +- Writing to files with identical content and writing the first file a second time to the same version no longer causes + the staged file to be erroneously deleted. + +### Changed + +- **Breaking:** A `S3AsyncClient` S3 client now must be used with ocfl-java-aws, and the sync version is no longer supported. +- ocfl-java-aws now uses the [S3 Transfer Manager](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/transfer-manager.html) + to upload and download files from S3. See the [usage guide](docs/USAGE.md#s3-transfer-manager) for more details. +- ocfl-java-aws now concurrently uploads files when writing an object to S3. This should improve object write performance. +- The `OcflObjectUpdater` was updated to be thread safe, enabling concurrently writing files to it. This _may_ speed up + writing a large number of files to an object. See the [usage guide](docs/USAGE.md#improving-write-performance) for + more details. ## [2.0.1] - 2024-03-01 diff --git a/docs/USAGE.md b/docs/USAGE.md index 80547088..3372bd1f 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -89,6 +89,10 @@ OCFL repository that supports the [mutable HEAD extension](https://ocfl.github.i most cloud storage, including S3, is now strongly consistent. Use `ObjectDetailsDatabaseBuilder` to construct an `ObjectDetailsDatabase`. +* **fileLockTimeoutDuration**: Configures the max amount of time to wait + for a file lock when updating an object from multiple threads. This + only matters if you concurrently write files to the same object, and + can otherwise be ignored. The default timeout is 1 minute. ## Storage Implementations @@ -165,8 +169,123 @@ on large files or objects with lots of files. Additionally, it does not cache any object files locally, requiring them to be retrieved from S3 on every access. +### S3 Transfer Manager + +`ocfl-java` uses the new [S3 Transfer +Manager](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/transfer-manager.html) +to upload and download files from S3. You can configure the transfer +manager to target a specific throughput, based on the needs of your +application. Consult the official documentation for details. + +However, note that it is **crucial** that you configure the transfer +manager to use the new [CRT S3 +client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html) +or wrap the old Netty async client in a `MultipartS3AsyncClient`. +The reason for this is because the transfer manager only supports +multipart uploads and downloads with the CRT client. However, you can +make multipart uploads work with the old client if it's wrapped in a +`MultipartS3AsyncClient`, but multipart downloads will still not work. + +Additionally, if you are using a 3rd party S3 implementation, you will +likely need to disable [object integrity +checks](https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html) +on the client that is used by the transfer manager. This is because +most/all 3rd party implementations do not support it, and it causes +the requests to fail. + +If you do not specify a transfer manager when constructing the +`OcflS3Client`, then it will create the default transfer manager using +the S3 client it was provided. When you use the default transfer +manager, you need to be sure to close the `OcflRepository` when you +are done with it, otherwise the transfer manager will not be closed. +Note that if you construct your own transfer manager, which is +advisable so that you can configure it to your specifications, it does +not need to use the same S3 client as the one already specified on +`OcflS3Client` but it can. For example, maybe you only want to use the +CRT client in the transfer manager, and you want to run everything +else through the regular client. + +If you are using the CRT client, then you need to add +`software.amazon.awssdk.crt:aws-crt` to your project, and create the +client similar to this, for the default settings: + +``` java +S3AsyncClient.crtBuilder().build(); +``` + +If you are using the Netty async client, then you don't need to add +any additional dependencies, and you'd create the client similar to +this, for the default settings: + +``` java +MultipartS3AsyncClient.create( + S3AsyncClient.builder().build(), + MultipartConfiguration.builder().build()); +``` + +Note the use of `MultipartS3AsyncClient`. Very important! + +If you are using a 3rd party S3 implementation and need to disable the +object integrity check, then you can do so as follows: + +``` java +S3AsyncClient.crtBuilder().checksumValidationEnabled(false).build(); +``` + +Unfortunately, this is harder to do if you use the Netty client +wrapped in `MultipartS3AsyncClient`. As of this writing, it must be +disabled per-request as follows: + +``` java +OcflS3Client.builder() + .bucket(bucket) + .s3Client(MultipartS3AsyncClient.create( + S3AsyncClient.builder().build(), + MultipartConfiguration.builder().build())) + .putObjectModifier( + (key, builder) -> builder.overrideConfiguration(override -> override.putExecutionAttribute( + AwsSignerExecutionAttribute.SERVICE_CONFIG, + S3Configuration.builder() + .checksumValidationEnabled(false) + .build()))) + .build(); +``` + ### Configuration +#### AWS SDK + +If you are using the [CRT +client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html), +remember to set `targetThroughputInGbps()` on the builder, which +controls the client's concurrency. + +If you are using the regular async Netty client, you will likely want +to set `connectionAcquisitionTimeout`, `writeTimeout`, `readTimeout`, +and `maxConcurrency`. This is critical because `ocfl-java` queues +concurrent writes, and Netty needs to be configured to handle your +application's load. An example configuration looks something like: + +``` java +S3AsyncClient.builder() + .region(Region.US_EAST_2) + .httpClientBuilder(NettyNioAsyncHttpClient.builder() + .connectionAcquisitionTimeout(Duration.ofSeconds(60)) + .writeTimeout(Duration.ofSeconds(120)) + .readTimeout(Duration.ofSeconds(60)) + .maxConcurrency(100)) + .build(); +``` + +If you see failures related to acquiring a connection from the pool, +then you either need to increase the concurrency, increase the +acquisition timeout, or both. + +That said, it is generally recommended to use the CRT client. It is +easier to configure and seems to have better performance. + +#### ocfl-java + Use `OcflStorageBuilder.builder()` to create and configure an `OcflStorage` instance. @@ -211,6 +330,42 @@ default in-memory lock. Additionally, you may want to either adjust or disable inventory caching, or hook up a distributed cache implementation. +### Improving write performance + +If your objects have a lot of files, then you _might_ get better +performance by parallelizing file reads and writes. Parallel writes +are only supported as of `ocfl-java` 2.1.0 or later. `ocfl-java` does +not do this for you automatically, but the following is some example +code of one possible way that you could implement parallel writes +to an object: + +```java +repo.updateObject(ObjectVersionId.head(objectId), null, updater -> { + List> futures; + + try (var files = Files.find(sourceDir, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + futures = files.map(file -> executor.submit(() -> updater.addPath( + file, sourceDir.relativize(file).toString()))) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + futures.forEach(future -> { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); +}); +``` + +The key bit here is that you use an `ExecutorService` to add multiple +files to the object at the same. You would likely want to use one thread +pool per object. Additionally, note that this technique will likely +make writes _slower_ if you are not writing a lot of files. + ### Inventory size OCFL inventory files can grow quite large when an object has lots of diff --git a/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java b/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java index a2221803..fe95e0d2 100644 --- a/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java +++ b/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java @@ -34,6 +34,9 @@ /** * Exposes methods for selectively updating a specific OCFL object. + *

+ * Implementations are thread safe, and you can concurrently use the same updater to add multiple files to the same + * object version. */ public interface OcflObjectUpdater { @@ -42,7 +45,7 @@ public interface OcflObjectUpdater { * it's a directory, the contents of the directory are inserted into the object's root. * *

By default, files are copied into the OCFL repository. If {@link OcflOption#MOVE_SOURCE} is specified, then - * files will be moved instead. Warning: If an exception occurs and the new version is not created, the files that were + * files will be moved instead. Warning: If an exception occurs and the new version is not created, the files that * will be lost. This operation is more efficient but less safe than the default copy. * *

By default, the change will be rejected if there is an existing file in an object at a logical path. diff --git a/ocfl-java-aws/pom.xml b/ocfl-java-aws/pom.xml index c35b03a3..474c0e8c 100644 --- a/ocfl-java-aws/pom.xml +++ b/ocfl-java-aws/pom.xml @@ -66,6 +66,10 @@ software.amazon.awssdk s3 + + software.amazon.awssdk + s3-transfer-manager + org.codehaus.woodstox stax2-api @@ -86,6 +90,11 @@ + + software.amazon.awssdk.crt + aws-crt + test + org.junit.jupiter junit-jupiter diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 3eb868eb..559ee817 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -24,9 +24,8 @@ package io.ocfl.aws; -import com.google.common.annotations.VisibleForTesting; +import io.ocfl.api.OcflRepository; import io.ocfl.api.exception.OcflIOException; -import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.util.Enforce; import io.ocfl.core.storage.cloud.CloudClient; import io.ocfl.core.storage.cloud.CloudObjectKey; @@ -36,43 +35,36 @@ import io.ocfl.core.util.UncheckedFiles; import java.io.IOException; import java.io.InputStream; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.function.BiConsumer; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; -import software.amazon.awssdk.services.s3.model.CompletedPart; -import software.amazon.awssdk.services.s3.model.CopyObjectRequest; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.Delete; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; import software.amazon.awssdk.services.s3.model.NoSuchBucketException; import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.transfer.s3.S3TransferManager; /** * CloudClient implementation that uses Amazon's S3 synchronous v2 client @@ -81,30 +73,16 @@ public class OcflS3Client implements CloudClient { private static final Logger LOG = LoggerFactory.getLogger(OcflS3Client.class); - private static final int KB = 1024; - private static final int MB = 1024 * KB; - private static final long GB = 1024 * MB; - private static final long TB = 1024 * GB; - - private static final long MAX_FILE_BYTES = 5 * TB; - - private static final int MAX_PART_BYTES = 100 * MB; - private static final int PART_SIZE_BYTES = 10 * MB; - - private static final int MAX_PARTS = 100; - private static final int PART_SIZE_INCREMENT = 10; - private static final int PARTS_INCREMENT = 100; - - private final S3Client s3Client; + private final S3AsyncClient s3Client; + private final S3TransferManager transferManager; private final String bucket; private final String repoPrefix; private final CloudObjectKey.Builder keyBuilder; private final BiConsumer putObjectModifier; - private final BiConsumer createMultipartModifier; - private int maxPartBytes = MAX_PART_BYTES; - private int partSizeBytes = PART_SIZE_BYTES; + private final boolean shouldCloseManager; + private final boolean useMultipartDownload; /** * Used to create a new OcflS3Client instance. @@ -121,31 +99,41 @@ public static Builder builder() { * @param s3Client aws sdk s3 client * @param bucket s3 bucket */ - public OcflS3Client(S3Client s3Client, String bucket) { + public OcflS3Client(S3AsyncClient s3Client, String bucket) { this(s3Client, bucket, null, null, null); } /** * @see OcflS3Client#builder() * - * @param s3Client aws sdk s3 client - * @param bucket s3 bucket - * @param prefix key prefix - * @param putObjectModifier hook for modifying putObject requests - * @param createMultipartModifier hook for modifying createMultipartUpload requests + * @param s3Client aws sdk s3 client, not null + * @param bucket s3 bucket, not null + * @param prefix key prefix, may be null + * @param transferManager aws sdk s3 transfer manager, may be null + * @param putObjectModifier hook for modifying putObject requests, may be null */ public OcflS3Client( - S3Client s3Client, + S3AsyncClient s3Client, String bucket, String prefix, - BiConsumer putObjectModifier, - BiConsumer createMultipartModifier) { - this.s3Client = Enforce.notNull(s3Client, "s3Client cannot be null"); + S3TransferManager transferManager, + BiConsumer putObjectModifier) { + Enforce.notNull(s3Client, "s3Client cannot be null"); this.bucket = Enforce.notBlank(bucket, "bucket cannot be blank"); this.repoPrefix = sanitizeRepoPrefix(prefix == null ? "" : prefix); + this.shouldCloseManager = transferManager == null; + this.transferManager = transferManager == null + ? S3TransferManager.builder().s3Client(s3Client).build() + : transferManager; this.keyBuilder = CloudObjectKey.builder().prefix(repoPrefix); this.putObjectModifier = putObjectModifier != null ? putObjectModifier : (k, b) -> {}; - this.createMultipartModifier = createMultipartModifier != null ? createMultipartModifier : (k, b) -> {}; + // This hacky nonsense is needed until MultipartS3AsyncClient supports downloads + this.useMultipartDownload = !(s3Client instanceof MultipartS3AsyncClient); + if (s3Client instanceof MultipartS3AsyncClient) { + this.s3Client = (S3AsyncClient) ((MultipartS3AsyncClient) s3Client).delegate(); + } else { + this.s3Client = s3Client; + } } private static String sanitizeRepoPrefix(String repoPrefix) { @@ -161,6 +149,16 @@ private static int indexLastNonSlash(String string) { return 0; } + /** + * {@inheritDoc} + */ + @Override + public void close() { + if (shouldCloseManager) { + transferManager.close(); + } + } + /** * {@inheritDoc} */ @@ -181,93 +179,52 @@ public String prefix() { * {@inheritDoc} */ @Override - public CloudObjectKey uploadFile(Path srcPath, String dstPath) { - return uploadFile(srcPath, dstPath, null); + public Future uploadFileAsync(Path srcPath, String dstPath) { + return uploadFileAsync(srcPath, dstPath, null); } /** * {@inheritDoc} */ @Override - public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentType) { + public Future uploadFileAsync(Path srcPath, String dstPath, String contentType) { var fileSize = UncheckedFiles.size(srcPath); var dstKey = keyBuilder.buildFromPath(dstPath); - if (fileSize >= MAX_FILE_BYTES) { - throw new OcflInputException( - String.format("Cannot store file %s because it exceeds the maximum file size.", srcPath)); - } - - if (fileSize > maxPartBytes) { - multipartUpload(srcPath, dstKey, fileSize, contentType); - } else { - LOG.debug("Uploading {} to bucket {} key {} size {}", srcPath, bucket, dstKey, fileSize); + LOG.debug("Uploading {} to bucket {} key {} size {}", srcPath, bucket, dstKey, fileSize); - var builder = PutObjectRequest.builder().contentType(contentType); + var builder = PutObjectRequest.builder().contentType(contentType); - putObjectModifier.accept(dstKey.getKey(), builder); + putObjectModifier.accept(dstKey.getKey(), builder); - s3Client.putObject( - builder.bucket(bucket) - .key(dstKey.getKey()) - .contentLength(fileSize) - .build(), - srcPath); - } + var upload = transferManager.uploadFile(req -> req.source(srcPath) + .putObjectRequest(builder.bucket(bucket).key(dstKey.getKey()).build()) + .build()); - return dstKey; + return new UploadFuture(upload, srcPath, dstKey); } - // TODO reduce memory consumption? - private void multipartUpload(Path srcPath, CloudObjectKey dstKey, long fileSize, String contentType) { - var partSize = determinePartSize(fileSize); - - LOG.debug( - "Multipart upload of {} to bucket {} key {}. File size: {}; part size: {}", - srcPath, - bucket, - dstKey, - fileSize, - partSize); - - var uploadId = beginMultipartUpload(dstKey, contentType); - - var completedParts = new ArrayList(); + /** + * {@inheritDoc} + */ + @Override + public CloudObjectKey uploadFile(Path srcPath, String dstPath) { + return uploadFile(srcPath, dstPath, null); + } + /** + * {@inheritDoc} + */ + @Override + public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentType) { + var future = uploadFileAsync(srcPath, dstPath, contentType); try { - try (var channel = FileChannel.open(srcPath, StandardOpenOption.READ)) { - var buffer = ByteBuffer.allocate(partSize); - var i = 1; - - while (channel.read(buffer) > 0) { - buffer.flip(); - - var partResponse = s3Client.uploadPart( - UploadPartRequest.builder() - .bucket(bucket) - .key(dstKey.getKey()) - .uploadId(uploadId) - .partNumber(i) - // TODO entire part is in memory. stream part to file first? - .build(), - RequestBody.fromByteBuffer(buffer)); - - completedParts.add(CompletedPart.builder() - .partNumber(i) - .eTag(partResponse.eTag()) - .build()); - - buffer.clear(); - i++; - } - } catch (IOException e) { - throw new OcflIOException(e); - } - - completeMultipartUpload(uploadId, dstKey, completedParts); - } catch (RuntimeException e) { - abortMultipartUpload(uploadId, dstKey); - throw e; + return future.get(); + } catch (ExecutionException e) { + throw (RuntimeException) e.getCause(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new OcflS3Exception("Failed ot upload " + srcPath, e); } } @@ -277,13 +234,18 @@ private void multipartUpload(Path srcPath, CloudObjectKey dstKey, long fileSize, @Override public CloudObjectKey uploadBytes(String dstPath, byte[] bytes, String contentType) { var dstKey = keyBuilder.buildFromPath(dstPath); - LOG.debug("Writing string to bucket {} key {}", bucket, dstKey); + LOG.debug("Writing bytes to bucket {} key {}", bucket, dstKey); var builder = PutObjectRequest.builder().contentType(contentType); putObjectModifier.accept(dstKey.getKey(), builder); - s3Client.putObject(builder.bucket(bucket).key(dstKey.getKey()).build(), RequestBody.fromBytes(bytes)); + try { + s3Client.putObject(builder.bucket(bucket).key(dstKey.getKey()).build(), AsyncRequestBody.fromBytes(bytes)) + .join(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to upload bytes to " + dstKey, OcflS3Util.unwrapCompletionEx(e)); + } return dstKey; } @@ -299,77 +261,23 @@ public CloudObjectKey copyObject(String srcPath, String dstPath) { LOG.debug("Copying {} to {} in bucket {}", srcKey, dstKey, bucket); try { - s3Client.copyObject(CopyObjectRequest.builder() - .destinationBucket(bucket) - .destinationKey(dstKey.getKey()) - .sourceBucket(bucket) - .sourceKey(srcKey.getKey()) + var copy = transferManager.copy(req -> req.copyObjectRequest(copyReq -> copyReq.destinationBucket(bucket) + .destinationKey(dstKey.getKey()) + .sourceBucket(bucket) + .sourceKey(srcKey.getKey()) + .build()) .build()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(e); - } catch (SdkException e) { - // TODO verify class and message - if (e.getMessage().contains("copy source is larger than the maximum allowable size")) { - multipartCopy(srcKey, dstKey); - } else { - throw e; - } - } - - return dstKey; - } - - private void multipartCopy(CloudObjectKey srcKey, CloudObjectKey dstKey) { - var head = headObject(srcKey); - var fileSize = head.contentLength(); - var partSize = determinePartSize(fileSize); - - LOG.debug( - "Multipart copy of {} to {} in bucket {}: File size {}; part size: {}", - srcKey, - dstKey, - bucket, - fileSize, - partSize); - - var uploadId = beginMultipartUpload(dstKey, null); - try { - var completedParts = new ArrayList(); - var part = 1; - var position = 0L; - - while (position < fileSize) { - var end = Math.min(fileSize - 1, part * partSize - 1); - var partResponse = s3Client.uploadPartCopy(UploadPartCopyRequest.builder() - .destinationBucket(bucket) - .destinationKey(dstKey.getKey()) - .sourceBucket(bucket) - .sourceKey(srcKey.getKey()) - .partNumber(part) - .uploadId(uploadId) - .copySourceRange(String.format("bytes=%s-%s", position, end)) - .build()); - - completedParts.add(CompletedPart.builder() - .partNumber(part) - .eTag(partResponse.copyPartResult().eTag()) - .build()); - - part++; - position = end + 1; - } - - completeMultipartUpload(uploadId, dstKey, completedParts); + copy.completionFuture().join(); } catch (RuntimeException e) { - abortMultipartUpload(uploadId, dstKey); - throw e; + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to copy object from " + srcKey + " to " + dstKey, cause); } - } - private HeadObjectResponse headObject(CloudObjectKey key) { - return s3Client.headObject( - HeadObjectRequest.builder().bucket(bucket).key(key.getKey()).build()); + return dstKey; } /** @@ -378,17 +286,33 @@ private HeadObjectResponse headObject(CloudObjectKey key) { @Override public Path downloadFile(String srcPath, Path dstPath) { var srcKey = keyBuilder.buildFromPath(srcPath); - LOG.debug("Downloading bucket {} key {} to {}", bucket, srcKey, dstPath); + LOG.debug("Downloading from bucket {} key {} to {}", bucket, srcKey, dstPath); try { - s3Client.getObject( - GetObjectRequest.builder() - .bucket(bucket) - .key(srcKey.getKey()) - .build(), - dstPath); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(e); + if (useMultipartDownload) { + transferManager + .downloadFile(req -> req.getObjectRequest(getReq -> getReq.bucket(bucket) + .key(srcKey.getKey()) + .build()) + .destination(dstPath) + .build()) + .completionFuture() + .join(); + } else { + s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(srcKey.getKey()) + .build(), + dstPath) + .join(); + } + } catch (RuntimeException e) { + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to download " + srcKey + " to " + dstPath, cause); } return dstPath; @@ -400,15 +324,22 @@ public Path downloadFile(String srcPath, Path dstPath) { @Override public InputStream downloadStream(String srcPath) { var srcKey = keyBuilder.buildFromPath(srcPath); - LOG.debug("Streaming bucket {} key {}", bucket, srcKey); + LOG.debug("Streaming from bucket {} key {}", bucket, srcKey); try { - return s3Client.getObject(GetObjectRequest.builder() - .bucket(bucket) - .key(srcKey.getKey()) - .build()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(String.format("Key %s not found in bucket %s.", srcKey, bucket), e); + return s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(srcKey.getKey()) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join(); + } catch (RuntimeException e) { + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to download " + srcKey, cause); } } @@ -432,16 +363,23 @@ public HeadResult head(String path) { var key = keyBuilder.buildFromPath(path); try { - var s3Result = s3Client.headObject( - HeadObjectRequest.builder().bucket(bucket).key(key.getKey()).build()); + var s3Result = s3Client.headObject(HeadObjectRequest.builder() + .bucket(bucket) + .key(key.getKey()) + .build()) + .join(); return new HeadResult() .setContentEncoding(s3Result.contentEncoding()) .setContentLength(s3Result.contentLength()) .setETag(s3Result.eTag()) .setLastModified(s3Result.lastModified()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(String.format("Key %s not found in bucket %s.", key, bucket), e); + } catch (RuntimeException e) { + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + throw new KeyNotFoundException("Key " + key + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to HEAD " + key, cause); } } @@ -484,15 +422,20 @@ public boolean directoryExists(String path) { LOG.debug("Checking existence of {} in bucket {}", prefix, bucket); - var response = s3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .delimiter("/") - .prefix(prefix) - .maxKeys(1) - .build()); - - return response.contents().stream().findAny().isPresent() - || response.commonPrefixes().stream().findAny().isPresent(); + try { + var response = s3Client.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .delimiter("/") + .prefix(prefix) + .maxKeys(1) + .build()) + .join(); + + return response.contents().stream().findAny().isPresent() + || response.commonPrefixes().stream().findAny().isPresent(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to list objects under " + prefix, OcflS3Util.unwrapCompletionEx(e)); + } } /** @@ -532,10 +475,23 @@ private void deleteObjectsInternal(Collection objectKeys) { .map(key -> ObjectIdentifier.builder().key(key.getKey()).build()) .collect(Collectors.toList()); - s3Client.deleteObjects(DeleteObjectsRequest.builder() - .bucket(bucket) - .delete(Delete.builder().objects(objectIds).build()) - .build()); + try { + var futures = new ArrayList>(); + + // Can only delete at most 1,000 objects per request + for (int i = 0; i < objectIds.size(); i += 999) { + var toDelete = objectIds.subList(i, Math.min(objectIds.size(), i + 999)); + futures.add(s3Client.deleteObjects(DeleteObjectsRequest.builder() + .bucket(bucket) + .delete(builder -> builder.objects(toDelete)) + .build())); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[] {})) + .join(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to delete objects " + objectIds, OcflS3Util.unwrapCompletionEx(e)); + } } } @@ -565,79 +521,42 @@ public void safeDeleteObjects(Collection objectPaths) { @Override public boolean bucketExists() { try { - s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()) + .join(); return true; - } catch (NoSuchBucketException e) { - return false; - } - } - - private String beginMultipartUpload(CloudObjectKey key, String contentType) { - var builder = CreateMultipartUploadRequest.builder().contentType(contentType); - - createMultipartModifier.accept(key.getKey(), builder); - - return s3Client.createMultipartUpload( - builder.bucket(bucket).key(key.getKey()).build()) - .uploadId(); - } - - private void completeMultipartUpload(String uploadId, CloudObjectKey key, List parts) { - s3Client.completeMultipartUpload(CompleteMultipartUploadRequest.builder() - .bucket(bucket) - .key(key.getKey()) - .uploadId(uploadId) - .multipartUpload(CompletedMultipartUpload.builder().parts(parts).build()) - .build()); - } - - private void abortMultipartUpload(String uploadId, CloudObjectKey key) { - try { - s3Client.abortMultipartUpload(AbortMultipartUploadRequest.builder() - .bucket(bucket) - .key(key.getKey()) - .uploadId(uploadId) - .build()); } catch (RuntimeException e) { - LOG.error("Failed to abort multipart upload. Bucket: {}; Key: {}; Upload Id: {}", bucket, key, uploadId, e); - } - } - - private int determinePartSize(long fileSize) { - var partSize = partSizeBytes; - var maxParts = MAX_PARTS; - - while (fileSize / partSize > maxParts) { - partSize += PART_SIZE_INCREMENT; - - if (partSize > maxPartBytes) { - maxParts += PARTS_INCREMENT; - partSize /= 2; + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + return false; } + throw new OcflS3Exception("Failed ot HEAD bucket " + bucket, cause); } - - return partSize; } private ListResult toListResult(ListObjectsV2Request.Builder requestBuilder) { - var result = s3Client.listObjectsV2(requestBuilder.build()); + try { + var result = s3Client.listObjectsV2(requestBuilder.build()).join(); - var prefixLength = prefixLength(result.prefix()); - var repoPrefixLength = repoPrefix.isBlank() ? 0 : repoPrefix.length() + 1; + var prefixLength = prefixLength(result.prefix()); + var repoPrefixLength = repoPrefix.isBlank() ? 0 : repoPrefix.length() + 1; - var objects = toObjectListings(result, prefixLength); - var dirs = toDirectoryListings(result, repoPrefixLength); + var objects = toObjectListings(result, prefixLength); + var dirs = toDirectoryListings(result, repoPrefixLength); - while (Boolean.TRUE.equals(result.isTruncated())) { - result = s3Client.listObjectsV2(requestBuilder - .continuationToken(result.nextContinuationToken()) - .build()); + while (Boolean.TRUE.equals(result.isTruncated())) { + result = s3Client.listObjectsV2(requestBuilder + .continuationToken(result.nextContinuationToken()) + .build()) + .join(); - objects.addAll(toObjectListings(result, prefixLength)); - dirs.addAll(toDirectoryListings(result, repoPrefixLength)); - } + objects.addAll(toObjectListings(result, prefixLength)); + dirs.addAll(toDirectoryListings(result, repoPrefixLength)); + } - return new ListResult().setObjects(objects).setDirectories(dirs); + return new ListResult().setObjects(objects).setDirectories(dirs); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to list objects", OcflS3Util.unwrapCompletionEx(e)); + } } private List toObjectListings(ListObjectsV2Response result, int prefixLength) { @@ -672,35 +591,89 @@ private int prefixLength(String prefix) { return prefixLength; } - @VisibleForTesting - void setMaxPartBytes(int maxPartBytes) { - this.maxPartBytes = maxPartBytes; - } - - @VisibleForTesting - void setPartSizeBytes(int partSizeBytes) { - this.partSizeBytes = partSizeBytes; + /** + * Returns true if the exception indicates the object/bucket was NOT found in S3. + * + * @param e the exception + * @return true if the object/bucket was NOT found in S3. + */ + private boolean wasNotFound(Throwable e) { + if (e instanceof NoSuchKeyException || e instanceof NoSuchBucketException) { + return true; + } else if (e instanceof S3Exception) { + // It seems like the CRT client does not return NoSuchKeyExceptions... + var s3e = (S3Exception) e; + return 404 == s3e.statusCode(); + } + return false; } public static class Builder { - private S3Client s3Client; + private S3AsyncClient s3Client; + private S3TransferManager transferManager; private String bucket; private String repoPrefix; private BiConsumer putObjectModifier; - private BiConsumer createMultipartModifier; /** - * The AWS SDK s3 client. Required. + * The AWS SDK S3 client. Required. + *

+ * If a {@link #transferManager(S3TransferManager)} is not specified, then the client specified here will be + * used to create a default transfer manager. If you specify a transfer manager, it does not need to use the + * same client as the one specified here. However, when creating a client to be used by the transfer manager, + * it is important to understand the following gotchas. + *

+ * The client used by the transfer manager MUST either be the CRT client + * or the regular S3AsyncClient wrapped in {@link software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient} + * in order for multipart uploads to work. Otherwise, files will be uploaded in single PUT requests. Additionally, + * only the CRT client supports multipart downloads. + *

+ * If you are using a 3rd party S3 implementation, then you will likely additionally need to disable the + * object integrity check + * as most 3rd party implementations do not support it. This easy to do on the CRT client builder by setting + * {@code checksumValidationEnabled()} to {@code false}. + *

+ * This client is NOT closed when the repository is closed, and the user is responsible for closing it when appropriate. + *

+ *

{@code
+         * // Please refer to the official documentation to properly configure your client.
+         * // When using the CRT client, create it something like this:
+         * S3AsyncClient.crtBuilder().build();
          *
+         * // When using the regular async client, create it something like this:
+         * MultipartS3AsyncClient.create(
+         *         S3AsyncClient.builder().build(),
+         *         MultipartConfiguration.builder().build());
+         * // The important part here is that you use the MultipartS3AsyncClient wrapper!
+         * }
* @param s3Client s3 client * @return builder */ - public Builder s3Client(S3Client s3Client) { + public Builder s3Client(S3AsyncClient s3Client) { this.s3Client = Enforce.notNull(s3Client, "s3Client cannot be null"); return this; } + /** + * The AWS SDK S3 transfer manager. This only needs to be specified when you need to set specific settings, and, + * if it is specified, it can use the same S3 client as was supplied in {@link #s3Client(S3AsyncClient)}. + * Otherwise, when not specified, the default transfer manager is created using the provided S3 Client. + *

+ * Please refer to the docs on {@link #s3Client(S3AsyncClient)} for additional details on how the S3 client + * used by the transfer manager should be configured. + *

+ * When a transfer manager is provided, it will NOT be closed when the repository is closed, and the user is + * responsible for closing it when appropriate. + * + * @param transferManager S3 transfer manager + * @return builder + */ + public Builder transferManager(S3TransferManager transferManager) { + this.transferManager = Enforce.notNull(transferManager, "transferManager cannot be null"); + return this; + } + /** * The S3 bucket to use. Required. * @@ -739,28 +712,15 @@ public Builder putObjectModifier(BiConsumer pu } /** - * Provides a hook to modify createMultipartUpload requests before they are executed. It is intended to be used - * to set object attributes such as tags. - * - *

The first argument is the object key the request is for, and the second is the request builder to apply - * changes to. - * - * @param createMultipartModifier hook for modifying createMultipartUpload requests - * @return builder - */ - public Builder createMultipartModifier( - BiConsumer createMultipartModifier) { - this.createMultipartModifier = createMultipartModifier; - return this; - } - - /** - * Constructs a new OcflS3Client. s3Client and bucket must be set. + * Constructs a new {@link OcflS3Client}. {@link #s3Client(S3AsyncClient)} and {@link #bucket(String)} must be set. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository so that the default + * S3 transfer manager is closed. * * @return OcflS3Client */ public OcflS3Client build() { - return new OcflS3Client(s3Client, bucket, repoPrefix, putObjectModifier, createMultipartModifier); + return new OcflS3Client(s3Client, bucket, repoPrefix, transferManager, putObjectModifier); } } } diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java new file mode 100644 index 00000000..9e8bccf6 --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import io.ocfl.api.exception.OcflJavaException; + +/** + * Captures an error that occurred while interacting with S3. + */ +public class OcflS3Exception extends OcflJavaException { + + public OcflS3Exception() {} + + public OcflS3Exception(String message) { + super(message); + } + + public OcflS3Exception(String message, Throwable cause) { + super(message, cause); + } + + public OcflS3Exception(Throwable cause) { + super(cause.getMessage(), cause); + } +} diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java new file mode 100644 index 00000000..0b136cbb --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import java.util.concurrent.CompletionException; + +final class OcflS3Util { + + private OcflS3Util() {} + + /** + * If the exception is a CompletionException, then the exception's cause is returned. Otherwise, the exception + * itself is returned. + * + * @param e the exception + * @return the exception or its cause + */ + static Throwable unwrapCompletionEx(RuntimeException e) { + Throwable cause = e; + if (e instanceof CompletionException) { + cause = e.getCause(); + } + return cause; + } +} diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java new file mode 100644 index 00000000..ec10a110 --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import io.ocfl.core.storage.cloud.CloudObjectKey; +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import software.amazon.awssdk.transfer.s3.model.FileUpload; + +/** + * Converts a FileUpload CompletionFuture into a regular Future. + */ +public class UploadFuture implements Future { + + private final FileUpload upload; + private final Path srcPath; + private final CloudObjectKey dstKey; + + public UploadFuture(FileUpload upload, Path srcPath, CloudObjectKey dstKey) { + this.upload = upload; + this.srcPath = srcPath; + this.dstKey = dstKey; + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return upload.completionFuture().cancel(mayInterruptIfRunning); + } + + @Override + public boolean isCancelled() { + return upload.completionFuture().isCancelled(); + } + + @Override + public boolean isDone() { + return upload.completionFuture().isDone(); + } + + @Override + public CloudObjectKey get() throws InterruptedException, ExecutionException { + try { + upload.completionFuture().get(); + } catch (RuntimeException e) { + throw new ExecutionException(new OcflS3Exception( + "Failed to upload " + srcPath + " to " + dstKey, OcflS3Util.unwrapCompletionEx(e))); + } + return dstKey; + } + + @Override + public CloudObjectKey get(long timeout, TimeUnit unit) + throws InterruptedException, ExecutionException, TimeoutException { + try { + upload.completionFuture().get(timeout, unit); + } catch (RuntimeException e) { + throw new ExecutionException(new OcflS3Exception( + "Failed to upload " + srcPath + " to " + dstKey, OcflS3Util.unwrapCompletionEx(e))); + } + return dstKey; + } +} diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java index dcb3e885..5f80030d 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java @@ -7,13 +7,14 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; -import at.favre.lib.bytes.Bytes; import com.adobe.testing.s3mock.junit5.S3MockExtension; import io.ocfl.core.storage.cloud.KeyNotFoundException; import io.ocfl.core.storage.cloud.ListResult; import io.ocfl.core.util.FileUtil; import java.io.IOException; +import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -23,6 +24,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -32,11 +34,16 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; +import software.amazon.awssdk.utils.AttributeMap; public class OcflS3ClientTest { @@ -48,7 +55,7 @@ public class OcflS3ClientTest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client awsS3Client; + private static S3AsyncClient awsS3Client; private static OcflS3Client client; private static String bucket; @@ -62,21 +69,37 @@ public static void beforeAll() { var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET"); if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { - LOG.info("Running tests against AWS"); - awsS3Client = S3Client.builder() + LOG.warn("Running tests against AWS"); + awsS3Client = S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider( StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) .build(); OcflS3ClientTest.bucket = bucket; } else { - LOG.info("Running tests against S3 Mock"); - awsS3Client = S3_MOCK.createS3ClientV2(); + LOG.warn("Running tests against S3 Mock"); + awsS3Client = MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); + ; OcflS3ClientTest.bucket = UUID.randomUUID().toString(); - awsS3Client.createBucket(request -> { - request.bucket(OcflS3ClientTest.bucket); - }); + awsS3Client + .createBucket(request -> { + request.bucket(OcflS3ClientTest.bucket); + }) + .join(); } client = OcflS3Client.builder() @@ -86,6 +109,12 @@ public static void beforeAll() { .build(); } + @AfterAll + public static void afterAll() { + awsS3Client.close(); + client.close(); + } + @AfterEach public void after() { client.deletePath(""); @@ -123,82 +152,26 @@ public void putObjectWithModification() throws IOException { assertObjectsExist(bucket, List.of(key1, key2)); - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) - .build(); - })) { + try (var response = resolveClient() + .getObject( + builder -> builder.bucket(bucket) + .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { assertEquals("text/plain", response.response().contentType()); } - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) - .build(); - })) { + try (var response = resolveClient() + .getObject( + builder -> builder.bucket(bucket) + .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { assertEquals("application/octet-stream", response.response().contentType()); } } - @Test - public void multipartUpload() { - var size = 1024 * 1024 * 5; - client.setMaxPartBytes(size); - client.setPartSizeBytes(size); - - var key = "dir/sub/test.txt"; - - var byteString = Bytes.random(size + 100).encodeHex(); - - client.uploadFile(createFile(byteString), key); - - assertObjectsExist(bucket, List.of(key)); - - assertEquals(byteString, client.downloadString(key)); - } - - @Test - public void multipartUploadWithModification() throws IOException { - var client = OcflS3Client.builder() - .s3Client(awsS3Client) - .bucket(bucket) - .repoPrefix(REPO_PREFIX) - .createMultipartModifier((key, builder) -> { - if (key.endsWith("/test.txt")) { - builder.contentType("text/plain"); - } - }) - .build(); - var size = 1024 * 1024 * 5; - client.setMaxPartBytes(size); - client.setPartSizeBytes(size); - - var key1 = "dir/sub/test.txt"; - var key2 = "dir/sub/test.json"; - - var byteString = Bytes.random(size + 100).encodeHex(); - client.uploadFile(createFile(byteString), key1); - - byteString = Bytes.random(size + 100).encodeHex(); - client.uploadFile(createFile(byteString), key2); - - assertObjectsExist(bucket, List.of(key1, key2)); - - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) - .build(); - })) { - assertEquals("text/plain", response.response().contentType()); - } - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) - .build(); - })) { - assertEquals("binary/octet-stream", response.response().contentType()); - } - } - @Test public void basicDownloadFileWhenExists() throws IOException { var key = "dir/sub/test.txt"; @@ -424,10 +397,12 @@ private void assertObjectListingAll(String searchPrefix, String key, ListResult. } private void assertObjectsExist(String bucket, Collection expectedKeys) { - var result = awsS3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(REPO_PREFIX) - .build()); + var result = awsS3Client + .listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(REPO_PREFIX) + .build()) + .join(); var actualKeys = result.contents().stream().map(S3Object::key).collect(Collectors.toList()); var prefixedExpected = expectedKeys.stream() @@ -436,4 +411,12 @@ private void assertObjectsExist(String bucket, Collection expectedKeys) assertThat(actualKeys, containsInAnyOrder(prefixedExpected.toArray(String[]::new))); } + + private S3AsyncClient resolveClient() { + if (awsS3Client instanceof MultipartS3AsyncClient) { + return (S3AsyncClient) ((MultipartS3AsyncClient) awsS3Client).delegate(); + } else { + return awsS3Client; + } + } } diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java index 9fa7071c..7ec03c6b 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java @@ -4,6 +4,7 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; import com.adobe.testing.s3mock.junit5.S3MockExtension; import io.ocfl.api.MutableOcflRepository; @@ -18,6 +19,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.Collection; @@ -26,6 +28,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -35,11 +38,15 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; +import software.amazon.awssdk.utils.AttributeMap; public class OcflS3Test { @@ -54,7 +61,7 @@ public class OcflS3Test { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; private static CloudClient cloudClient; private static String bucket; @@ -68,21 +75,35 @@ public static void beforeAll() { var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET"); if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { - LOG.info("Running tests against AWS"); - s3Client = S3Client.builder() + LOG.warn("Running tests against AWS"); + s3Client = S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider( StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) .build(); OcflS3Test.bucket = bucket; } else { - LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + LOG.warn("Running tests against S3 Mock"); + s3Client = MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); OcflS3Test.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(OcflS3Test.bucket); - }); + request.bucket(OcflS3Test.bucket); + }) + .join(); } cloudClient = OcflS3Client.builder() @@ -92,6 +113,12 @@ public static void beforeAll() { .build(); } + @AfterAll + public static void afterAll() { + s3Client.close(); + cloudClient.close(); + } + @AfterEach public void after() { cloudClient.deletePath(""); @@ -227,9 +254,10 @@ public void basicPurgeTest() { private void assertObjectsExist(String bucket, String prefix, Collection expectedKeys) { var result = s3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, prefix)) - .build()); + .bucket(bucket) + .prefix(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, prefix)) + .build()) + .join(); var actualKeys = result.contents().stream().map(S3Object::key).collect(Collectors.toList()); var prefixedExpected = expectedKeys.stream() diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java index c3763bd2..6c4beb0f 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java @@ -41,6 +41,7 @@ import io.ocfl.core.util.FileUtil; import io.ocfl.core.util.UncheckedFiles; import java.nio.file.Path; +import java.time.Duration; import java.util.function.Consumer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,6 +59,12 @@ public class DefaultMutableOcflRepository extends DefaultOcflRepository implemen private static final Logger LOG = LoggerFactory.getLogger(DefaultMutableOcflRepository.class); + private final OcflConfig config; + private final Duration fileLockTimeoutDuration; + private final OcflStorage storage; + private final Path workDir; + private final ObjectLock objectLock; + /** * @see OcflRepositoryBuilder * @@ -69,6 +76,7 @@ public class DefaultMutableOcflRepository extends DefaultOcflRepository implemen * @param contentPathConstraintProcessor content path constraint processor * @param config ocfl defaults configuration * @param verifyStaging true if the contents of a stage version should be double-checked + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock */ public DefaultMutableOcflRepository( OcflStorage storage, @@ -78,7 +86,8 @@ public DefaultMutableOcflRepository( LogicalPathMapper logicalPathMapper, ContentPathConstraintProcessor contentPathConstraintProcessor, OcflConfig config, - boolean verifyStaging) { + boolean verifyStaging, + Duration fileLockTimeoutDuration) { super( storage, workDir, @@ -87,7 +96,14 @@ public DefaultMutableOcflRepository( logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging); + verifyStaging, + fileLockTimeoutDuration); + this.storage = Enforce.notNull(storage, "storage cannot be null"); + this.workDir = Enforce.notNull(workDir, "workDir cannot be null"); + this.objectLock = Enforce.notNull(objectLock, "objectLock cannot be null"); + this.config = Enforce.notNull(config, "config cannot be null"); + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); } /** @@ -118,14 +134,16 @@ public ObjectVersionId stageChanges( .getParent(); var inventoryUpdater = inventoryUpdaterBuilder.buildCopyStateMutable(inventory); + var fileLocker = new FileLocker(fileLockTimeoutDuration); var addFileProcessor = - addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); - var updater = new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor); + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); + var updater = + new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor, fileLocker); try { objectUpdater.accept(updater); var newInventory = buildNewInventory(inventoryUpdater, versionInfo); - writeNewVersion(newInventory, stagingDir, false); + writeNewVersion(newInventory, stagingDir, false, updater.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -213,7 +231,7 @@ private Inventory createAndPersistEmptyVersion(ObjectVersionId objectId) { .build()) .build(); - writeNewVersion(inventory, stagingDir, false); + writeNewVersion(inventory, stagingDir, false, false); return inventory; } finally { FileUtil.safeDeleteDirectory(stagingDir); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java index be7f4fc9..53668a73 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java @@ -45,16 +45,18 @@ import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.security.DigestInputStream; -import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Default implementation of OcflObjectUpdater that is used by DefaultOcflRepository to provide write access to an object. - * - *

This class is NOT thread safe. + *

+ * This class is thread safe, and you can concurrently use the same updater to add multiple files to the same + * object version. */ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { @@ -64,20 +66,23 @@ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { private final InventoryUpdater inventoryUpdater; private final Path stagingDir; private final AddFileProcessor addFileProcessor; - + private final FileLocker fileLocker; private final Map stagedFileMap; + private final AtomicBoolean checkForEmptyDirs; public DefaultOcflObjectUpdater( Inventory inventory, InventoryUpdater inventoryUpdater, Path stagingDir, - AddFileProcessor addFileProcessor) { + AddFileProcessor addFileProcessor, + FileLocker fileLocker) { this.inventory = Enforce.notNull(inventory, "inventory cannot be null"); this.inventoryUpdater = Enforce.notNull(inventoryUpdater, "inventoryUpdater cannot be null"); this.stagingDir = Enforce.notNull(stagingDir, "stagingDir cannot be null"); this.addFileProcessor = Enforce.notNull(addFileProcessor, "addFileProcessor cannot be null"); - - this.stagedFileMap = new HashMap<>(); + this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); + this.stagedFileMap = new ConcurrentHashMap<>(); + this.checkForEmptyDirs = new AtomicBoolean(false); } @Override @@ -129,51 +134,53 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc Enforce.notNull(input, "input cannot be null"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Write stream to object <{}> at logical path <{}>", inventory.getId(), destinationPath); + return fileLocker.withLock(destinationPath, () -> { + LOG.debug("Write stream to object <{}> at logical path <{}>", inventory.getId(), destinationPath); - var stagingFullPath = stagingFullPath(inventoryUpdater.innerContentPath(destinationPath)); + var stagingFullPath = stagingFullPath(inventoryUpdater.innerContentPath(destinationPath)); - var digestInput = wrapInDigestInputStream(input); - LOG.debug("Writing input stream to: {}", stagingFullPath); - if (Files.notExists(stagingFullPath.getParent())) { - UncheckedFiles.createDirectories(stagingFullPath.getParent()); - } - UncheckedFiles.copy(digestInput, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); - - if (input instanceof FixityCheckInputStream) { - try { - ((FixityCheckInputStream) input).checkFixity(); - } catch (FixityCheckException e) { - FileUtil.safeDelete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); - throw e; + var digestInput = wrapInDigestInputStream(input); + LOG.debug("Writing input stream to: {}", stagingFullPath); + if (Files.notExists(stagingFullPath.getParent())) { + UncheckedFiles.createDirectories(stagingFullPath.getParent()); + } + UncheckedFiles.copy(digestInput, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + + if (input instanceof FixityCheckInputStream) { + try { + ((FixityCheckInputStream) input).checkFixity(); + } catch (FixityCheckException e) { + FileUtil.safeDelete(stagingFullPath); + checkForEmptyDirs.set(true); + throw e; + } } - } - - String digest; - if (digestInput instanceof FixityCheckInputStream) { - digest = ((FixityCheckInputStream) digestInput) - .getActualDigestValue() - .get(); - } else { - digest = Bytes.wrap(digestInput.getMessageDigest().digest()).encodeHex(); - } + String digest; - var result = inventoryUpdater.addFile(digest, destinationPath, options); + if (digestInput instanceof FixityCheckInputStream) { + digest = ((FixityCheckInputStream) digestInput) + .getActualDigestValue() + .get(); + } else { + digest = Bytes.wrap(digestInput.getMessageDigest().digest()).encodeHex(); + } - if (!result.isNew()) { - LOG.debug( - "Deleting file <{}> because a file with same digest <{}> is already present in the object", - stagingFullPath, - digest); - UncheckedFiles.delete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); - } else { - stagedFileMap.put(destinationPath, stagingFullPath); - } + var result = inventoryUpdater.addFile(digest, destinationPath, options); + + if (!result.isNew()) { + LOG.debug( + "Deleting file <{}> because a file with same digest <{}> is already present in the object", + stagingFullPath, + digest); + UncheckedFiles.delete(stagingFullPath); + checkForEmptyDirs.set(true); + } else { + stagedFileMap.put(destinationPath, stagingFullPath); + } - return this; + return this; + }); } /** @@ -183,12 +190,14 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc public OcflObjectUpdater removeFile(String path) { Enforce.notBlank(path, "path cannot be blank"); - LOG.debug("Remove <{}> from object <{}>", path, inventory.getId()); + return fileLocker.withLock(path, () -> { + LOG.debug("Remove <{}> from object <{}>", path, inventory.getId()); - var results = inventoryUpdater.removeFile(path); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.removeFile(path); + removeUnneededStagedFiles(results); - return this; + return this; + }); } /** @@ -199,12 +208,23 @@ public OcflObjectUpdater renameFile(String sourcePath, String destinationPath, O Enforce.notBlank(sourcePath, "sourcePath cannot be blank"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Rename file in object <{}> from <{}> to <{}>", inventory.getId(), sourcePath, destinationPath); + var lock1 = fileLocker.lock(sourcePath); + try { + var lock2 = fileLocker.lock(destinationPath); + try { + LOG.debug( + "Rename file in object <{}> from <{}> to <{}>", inventory.getId(), sourcePath, destinationPath); - var results = inventoryUpdater.renameFile(sourcePath, destinationPath, options); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.renameFile(sourcePath, destinationPath, options); + removeUnneededStagedFiles(results); - return this; + return this; + } finally { + lock2.unlock(); + } + } finally { + lock1.unlock(); + } } /** @@ -217,12 +237,14 @@ public OcflObjectUpdater reinstateFile( Enforce.notBlank(sourcePath, "sourcePath cannot be blank"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Reinstate file at <{}> in object <{}> to <{}>", sourcePath, sourceVersionNum, destinationPath); + return fileLocker.withLock(destinationPath, () -> { + LOG.debug("Reinstate file at <{}> in object <{}> to <{}>", sourcePath, sourceVersionNum, destinationPath); - var results = inventoryUpdater.reinstateFile(sourceVersionNum, sourcePath, destinationPath, options); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.reinstateFile(sourceVersionNum, sourcePath, destinationPath, options); + removeUnneededStagedFiles(results); - return this; + return this; + }); } /** @@ -244,47 +266,49 @@ public OcflObjectUpdater addFileFixity(String logicalPath, DigestAlgorithm algor Enforce.notNull(algorithm, "algorithm cannot be null"); Enforce.notBlank(value, "value cannot be null"); - LOG.debug( - "Add file fixity for file <{}> in object <{}>: Algorithm: {}; Value: {}", - logicalPath, - inventory.getId(), - algorithm.getOcflName(), - value); + return fileLocker.withLock(logicalPath, () -> { + LOG.debug( + "Add file fixity for file <{}> in object <{}>: Algorithm: {}; Value: {}", + logicalPath, + inventory.getId(), + algorithm.getOcflName(), + value); - var digest = inventoryUpdater.getFixityDigest(logicalPath, algorithm); - var alreadyExists = true; + var digest = inventoryUpdater.getFixityDigest(logicalPath, algorithm); + var alreadyExists = true; - if (digest == null) { - alreadyExists = false; + if (digest == null) { + alreadyExists = false; - if (!stagedFileMap.containsKey(logicalPath)) { - throw new OcflInputException(String.format( - "%s was not newly added in this update. Fixity information can only be added on new files.", - logicalPath)); - } + if (!algorithm.hasJavaStandardName()) { + throw new OcflInputException( + "The specified digest algorithm is not mapped to a Java name: " + algorithm); + } - if (!algorithm.hasJavaStandardName()) { - throw new OcflInputException( - "The specified digest algorithm is not mapped to a Java name: " + algorithm); - } + var file = stagedFileMap.get(logicalPath); - var file = stagedFileMap.get(logicalPath); + if (file == null) { + throw new OcflInputException(String.format( + "%s was not newly added in this update. Fixity information can only be added on new files.", + logicalPath)); + } - LOG.debug("Computing {} hash of {}", algorithm.getJavaStandardName(), file); - digest = DigestUtil.computeDigestHex(algorithm, file); - } + LOG.debug("Computing {} hash of {}", algorithm.getJavaStandardName(), file); + digest = DigestUtil.computeDigestHex(algorithm, file); + } - if (!value.equalsIgnoreCase(digest)) { - throw new FixityCheckException(String.format( - "Expected %s digest of %s to be %s, but was %s.", - algorithm.getJavaStandardName(), logicalPath, value, digest)); - } + if (!value.equalsIgnoreCase(digest)) { + throw new FixityCheckException(String.format( + "Expected %s digest of %s to be %s, but was %s.", + algorithm.getJavaStandardName(), logicalPath, value, digest)); + } - if (!alreadyExists) { - inventoryUpdater.addFixity(logicalPath, algorithm, digest); - } + if (!alreadyExists) { + inventoryUpdater.addFixity(logicalPath, algorithm, digest); + } - return this; + return this; + }); } /** @@ -297,6 +321,16 @@ public OcflObjectUpdater clearFixityBlock() { return this; } + /** + * Returns true if the processor deleted a file and thus we need to look for empty directories to delete prior to + * writing the version. + * + * @return true if we need to look for empty directories + */ + public boolean checkForEmptyDirs() { + return checkForEmptyDirs.get() || addFileProcessor.checkForEmptyDirs(); + } + private void removeUnneededStagedFiles(Set removeFiles) { removeFiles.forEach(remove -> { var stagingPath = stagingFullPath(remove.getPathUnderContentDir()); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java index 62c0d97c..d82ddfc2 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java @@ -72,6 +72,7 @@ import java.nio.file.Path; import java.security.DigestOutputStream; import java.time.Clock; +import java.time.Duration; import java.time.OffsetDateTime; import java.util.HashMap; import java.util.HashSet; @@ -93,17 +94,17 @@ public class DefaultOcflRepository implements OcflRepository { private static final Logger LOG = LoggerFactory.getLogger(DefaultOcflRepository.class); + private final OcflConfig config; private final boolean verifyStaging; - protected final OcflStorage storage; - protected final InventoryMapper inventoryMapper; - protected final Path workDir; - protected final ObjectLock objectLock; - protected final ResponseMapper responseMapper; + private final Duration fileLockTimeoutDuration; + private final OcflStorage storage; + private final InventoryMapper inventoryMapper; + private final Path workDir; + private final ObjectLock objectLock; + private final ResponseMapper responseMapper; protected final InventoryUpdater.Builder inventoryUpdaterBuilder; protected final AddFileProcessor.Builder addFileProcessorBuilder; - protected final OcflConfig config; - private Clock clock; private final AtomicBoolean closed = new AtomicBoolean(false); @@ -119,6 +120,7 @@ public class DefaultOcflRepository implements OcflRepository { * @param contentPathConstraintProcessor content path constraint processor * @param config ocfl defaults configuration * @param verifyStaging true if the contents of a stage version should be double-checked + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock */ public DefaultOcflRepository( OcflStorage storage, @@ -128,13 +130,16 @@ public DefaultOcflRepository( LogicalPathMapper logicalPathMapper, ContentPathConstraintProcessor contentPathConstraintProcessor, OcflConfig config, - boolean verifyStaging) { + boolean verifyStaging, + Duration fileLockTimeoutDuration) { this.storage = Enforce.notNull(storage, "storage cannot be null"); this.workDir = Enforce.notNull(workDir, "workDir cannot be null"); this.objectLock = Enforce.notNull(objectLock, "objectLock cannot be null"); this.inventoryMapper = Enforce.notNull(inventoryMapper, "inventoryMapper cannot be null"); this.config = Enforce.notNull(config, "config cannot be null"); this.verifyStaging = verifyStaging; + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); inventoryUpdaterBuilder = InventoryUpdater.builder() .contentPathMapperBuilder(ContentPathMapper.builder() @@ -170,14 +175,16 @@ public ObjectVersionId putObject( var stagingDir = createStagingDir(objectVersionId.getObjectId()); var contentDir = createStagingContentDir(inventory, stagingDir); - var fileProcessor = addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); + var fileLocker = new FileLocker(fileLockTimeoutDuration); + var fileProcessor = + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); fileProcessor.processPath(path, options); var upgrade = inventoryUpdater.upgradeInventory(config); var newInventory = buildNewInventory(inventoryUpdater, versionInfo); try { - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, fileProcessor.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -206,15 +213,17 @@ public ObjectVersionId updateObject( var contentDir = createStagingContentDir(inventory, stagingDir); var inventoryUpdater = inventoryUpdaterBuilder.buildCopyState(inventory); + var fileLocker = new FileLocker(fileLockTimeoutDuration); var addFileProcessor = - addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); - var updater = new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor); + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); + var updater = + new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor, fileLocker); try { objectUpdater.accept(updater); var upgrade = inventoryUpdater.upgradeInventory(config); var newInventory = buildNewInventory(inventoryUpdater, versionInfo); - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, updater.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -391,7 +400,7 @@ public ObjectVersionId replicateVersionAsHead(ObjectVersionId objectVersionId, V createStagingContentDir(inventory, stagingDir); try { - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, false); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -624,10 +633,16 @@ private void getObjectInternal(Inventory inventory, VersionNum versionNum, Path } } - protected void writeNewVersion(Inventory inventory, Path stagingDir, boolean upgradedOcflVersion) { + protected void writeNewVersion( + Inventory inventory, Path stagingDir, boolean upgradedOcflVersion, boolean checkForEmptyDirs) { var finalInventory = writeInventory(inventory, stagingDir); var contentDir = stagingDir.resolve(inventory.resolveContentDirectory()); + + if (checkForEmptyDirs) { + FileUtil.deleteEmptyDirs(contentDir); + } + if (!FileUtil.hasChildren(contentDir)) { UncheckedFiles.delete(contentDir); } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java b/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java new file mode 100644 index 00000000..d5f2d311 --- /dev/null +++ b/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java @@ -0,0 +1,115 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.core; + +import io.ocfl.api.exception.LockException; +import io.ocfl.api.util.Enforce; +import io.ocfl.core.util.UncheckedCallable; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Provides locks for logical paths, so that an object may be safely modified by multiple threads. + */ +public class FileLocker { + + private static final Logger log = LoggerFactory.getLogger(FileLocker.class); + + private final Map locks; + private final long timeoutMillis; + + /** + * @param timeoutDuration the max amount of time to wait for a file lock + */ + public FileLocker(Duration timeoutDuration) { + this.timeoutMillis = Enforce.notNull(timeoutDuration, "timeoutDuration cannot be null") + .toMillis(); + locks = new ConcurrentHashMap<>(); + } + + /** + * Returns a lock on the specified logical path or throws a {@link LockException} if a lock was unable to be + * acquired. This lock MUST be released in a finally block. + * + * @param logicalPath the path to lock + * @return the lock + * @throws LockException when unable to acquire a lock + */ + public ReentrantLock lock(String logicalPath) { + var lock = locks.computeIfAbsent(logicalPath, k -> new ReentrantLock()); + log.debug("Acquiring lock on {}", logicalPath); + boolean acquired; + try { + acquired = lock.tryLock(timeoutMillis, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new LockException("Failed to acquire lock on file " + logicalPath, e); + } + if (acquired) { + log.debug("Acquired lock on {}", logicalPath); + return lock; + } else { + throw new LockException("Failed to acquire lock on file " + logicalPath); + } + } + + /** + * Executes the runnable after acquire a lock on the specified logical path. If the lock cannot be acquired, + * a {@link LockException} is thrown. + * + * @param logicalPath the path to lock + * @throws LockException when unable to acquire a lock + */ + public void withLock(String logicalPath, Runnable runnable) { + var lock = lock(logicalPath); + try { + runnable.run(); + } finally { + lock.unlock(); + } + } + + /** + * Executes the callable after acquire a lock on the specified logical path. If the lock cannot be acquired, + * a {@link LockException} is thrown. + * + * @param logicalPath the path to lock + * @return the output of the callable + * @throws LockException when unable to acquire a lock + */ + public T withLock(String logicalPath, UncheckedCallable callable) { + var lock = lock(logicalPath); + try { + return callable.call(); + } finally { + lock.unlock(); + } + } +} diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java index d8b9f085..5d84ed39 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java @@ -63,7 +63,7 @@ /** * Constructs a local file system based OCFL repository sensible defaults that can be overridden prior to calling - * build(). + * {@link #build()}. * *

Important: The same OcflRepositoryBuilder instance MUST NOT be used to initialize multiple repositories. */ @@ -74,6 +74,7 @@ public class OcflRepositoryBuilder { private OcflExtensionConfig defaultLayoutConfig; private Path workDir; private boolean verifyStaging; + private Duration fileLockTimeoutDuration; private ObjectLock objectLock; private Cache inventoryCache; @@ -86,7 +87,7 @@ public class OcflRepositoryBuilder { /** * Constructs a local file system based OCFL repository sensible defaults that can be overridden prior to calling - * build(). + * {@link #build()}. * *

Important: The same OcflRepositoryBuilder instance MUST NOT be used to initialize multiple repositories. */ @@ -103,6 +104,7 @@ public OcflRepositoryBuilder() { unsupportedBehavior = UnsupportedExtensionBehavior.FAIL; ignoreUnsupportedExtensions = Collections.emptySet(); verifyStaging = true; + fileLockTimeoutDuration = Duration.ofMinutes(1); } /** @@ -373,8 +375,24 @@ public OcflRepositoryBuilder verifyStaging(boolean verifyStaging) { return this; } + /** + * Configures the max amount of time to wait for a file lock when updating an object from multiple threads. This + * only matters if you concurrently write files to the same object, and can otherwise be ignored. The default + * timeout is 1 minute. + * + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock + * @return builder + */ + public OcflRepositoryBuilder fileLockTimeoutDuration(Duration fileLockTimeoutDuration) { + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); + return this; + } + /** * Constructs an OCFL repository. Brand new repositories are initialized. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository. * * @return OcflRepository */ @@ -384,6 +402,8 @@ public OcflRepository build() { /** * Constructs an OCFL repository that allows the use of the Mutable HEAD Extension. Brand new repositories are initialized. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository. * * @return MutableOcflRepository */ @@ -418,7 +438,8 @@ private T buildInternal(Class clazz) { logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging)); + verifyStaging, + fileLockTimeoutDuration)); } return clazz.cast(new DefaultOcflRepository( @@ -429,7 +450,8 @@ private T buildInternal(Class clazz) { logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging)); + verifyStaging, + fileLockTimeoutDuration)); } private OcflStorage cache(OcflStorage storage) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java index ee31544b..ec495a41 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java @@ -29,6 +29,7 @@ import io.ocfl.api.exception.OcflIOException; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.api.util.Enforce; +import io.ocfl.core.FileLocker; import io.ocfl.core.util.DigestUtil; import io.ocfl.core.util.FileUtil; import io.ocfl.core.util.UncheckedFiles; @@ -41,9 +42,11 @@ import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; import java.security.DigestOutputStream; -import java.security.MessageDigest; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,9 +58,10 @@ public class AddFileProcessor { private static final Logger LOG = LoggerFactory.getLogger(AddFileProcessor.class); private final InventoryUpdater inventoryUpdater; + private final FileLocker fileLocker; private final Path stagingDir; private final DigestAlgorithm digestAlgorithm; - private final MessageDigest messageDigest; + private final AtomicBoolean checkForEmptyDirs; public static Builder builder() { return new Builder(); @@ -66,8 +70,11 @@ public static Builder builder() { public static class Builder { public AddFileProcessor build( - InventoryUpdater inventoryUpdater, Path stagingDir, DigestAlgorithm digestAlgorithm) { - return new AddFileProcessor(inventoryUpdater, stagingDir, digestAlgorithm); + InventoryUpdater inventoryUpdater, + FileLocker fileLocker, + Path stagingDir, + DigestAlgorithm digestAlgorithm) { + return new AddFileProcessor(inventoryUpdater, fileLocker, stagingDir, digestAlgorithm); } } @@ -78,11 +85,16 @@ public AddFileProcessor build( * @param stagingDir the staging directory to move files into * @param digestAlgorithm the digest algorithm */ - public AddFileProcessor(InventoryUpdater inventoryUpdater, Path stagingDir, DigestAlgorithm digestAlgorithm) { + public AddFileProcessor( + InventoryUpdater inventoryUpdater, + FileLocker fileLocker, + Path stagingDir, + DigestAlgorithm digestAlgorithm) { this.inventoryUpdater = Enforce.notNull(inventoryUpdater, "inventoryUpdater cannot be null"); + this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); this.stagingDir = Enforce.notNull(stagingDir, "stagingDir cannot be null"); this.digestAlgorithm = Enforce.notNull(digestAlgorithm, "digestAlgorithm cannot be null"); - this.messageDigest = digestAlgorithm.getMessageDigest(); + this.checkForEmptyDirs = new AtomicBoolean(false); } /** @@ -110,15 +122,20 @@ public Map processPath(Path sourcePath, String destinationPath, Oc var results = new HashMap(); var optionsSet = OcflOption.toSet(options); + var isMove = optionsSet.contains(OcflOption.MOVE_SOURCE); var destination = destinationPath(destinationPath, sourcePath); + var messageDigest = digestAlgorithm.getMessageDigest(); + var locks = new ArrayList(); try (var paths = Files.find( sourcePath, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile(), FileVisitOption.FOLLOW_LINKS)) { - paths.forEach(file -> { + for (var it = paths.iterator(); it.hasNext(); ) { + var file = it.next(); messageDigest.reset(); var logicalPath = logicalPath(sourcePath, file, destination); + locks.add(fileLocker.lock(logicalPath)); - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + if (isMove) { var digest = DigestUtil.computeDigestHex(messageDigest, file); var result = inventoryUpdater.addFile(digest, logicalPath, options); @@ -164,15 +181,17 @@ public Map processPath(Path sourcePath, String destinationPath, Oc stagingFullPath, digest); UncheckedFiles.delete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + checkForEmptyDirs.set(true); } } - }); + } } catch (IOException e) { throw new OcflIOException(e); + } finally { + locks.forEach(ReentrantLock::unlock); } - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + if (isMove) { // Cleanup empty dirs FileUtil.safeDeleteDirectory(sourcePath); } @@ -205,23 +224,36 @@ public Map processFileWithDigest( var destination = destinationPath(destinationPath, sourcePath); var logicalPath = logicalPath(sourcePath, sourcePath, destination); - var result = inventoryUpdater.addFile(digest, logicalPath, options); - if (result.isNew()) { - var stagingFullPath = stagingFullPath(result.getPathUnderContentDir()); + return fileLocker.withLock(logicalPath, () -> { + var result = inventoryUpdater.addFile(digest, logicalPath, options); - results.put(logicalPath, stagingFullPath); + if (result.isNew()) { + var stagingFullPath = stagingFullPath(result.getPathUnderContentDir()); - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { - LOG.debug("Moving file <{}> to <{}>", sourcePath, stagingFullPath); - FileUtil.moveFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); - } else { - LOG.debug("Copying file <{}> to <{}>", sourcePath, stagingFullPath); - FileUtil.copyFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + results.put(logicalPath, stagingFullPath); + + if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + LOG.debug("Moving file <{}> to <{}>", sourcePath, stagingFullPath); + FileUtil.moveFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + } else { + LOG.debug("Copying file <{}> to <{}>", sourcePath, stagingFullPath); + FileUtil.copyFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + } } - } - return results; + return results; + }); + } + + /** + * Returns true if the processor deleted a file and thus we need to look for empty directories to delete prior to + * writing the version. + * + * @return true if we need to look for empty directories + */ + public boolean checkForEmptyDirs() { + return checkForEmptyDirs.get(); } private String destinationPath(String path, Path sourcePath) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java index 8cb93976..bf62409c 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java @@ -43,6 +43,8 @@ import java.time.OffsetDateTime; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; /** * This class is used to record changes to OCFL objects and construct an updated inventory. @@ -60,6 +62,8 @@ public class InventoryUpdater { private final ContentPathMapper contentPathMapper; private final PathConstraintProcessor logicalPathConstraints; + private final Lock lock = new ReentrantLock(); + public static Builder builder() { return new Builder(); } @@ -187,12 +191,17 @@ private InventoryUpdater( * @return new inventory */ public Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo versionInfo) { - return inventoryBuilder - .addHeadVersion(versionBuilder - .versionInfo(versionInfo) - .created(createdTimestamp) - .build()) - .build(); + lock.lock(); + try { + return inventoryBuilder + .addHeadVersion(versionBuilder + .versionInfo(versionInfo) + .created(createdTimestamp) + .build()) + .build(); + } finally { + lock.unlock(); + } } /** @@ -203,12 +212,20 @@ public Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo * @return true if the inventory is upgraded; false otherwise */ public boolean upgradeInventory(OcflConfig config) { - if (config.isUpgradeObjectsOnWrite() - && inventoryBuilder.getType().compareTo(config.getOcflVersion().getInventoryType()) < 0) { - inventoryBuilder.type(config.getOcflVersion().getInventoryType()); - return true; + lock.lock(); + try { + if (config.isUpgradeObjectsOnWrite() + && inventoryBuilder + .getType() + .compareTo(config.getOcflVersion().getInventoryType()) + < 0) { + inventoryBuilder.type(config.getOcflVersion().getInventoryType()); + return true; + } + return false; + } finally { + lock.unlock(); } - return false; } /** @@ -220,26 +237,43 @@ public boolean upgradeInventory(OcflConfig config) { * @return details about the file if it was added to the manifest */ public AddFileResult addFile(String fileId, String logicalPath, OcflOption... options) { - logicalPathConstraints.apply(logicalPath); - - overwriteProtection(logicalPath, options); - versionBuilder.validateNonConflictingPath(logicalPath); + lock.lock(); + try { + logicalPathConstraints.apply(logicalPath); + + overwriteProtection(logicalPath, options); + versionBuilder.validateNonConflictingPath(logicalPath); + + var oldFileId = versionBuilder.getFileId(logicalPath); + + if (fileId.equalsIgnoreCase(oldFileId)) { + var contentPath = contentPathMapper.fromLogicalPath(logicalPath); + if (inventoryBuilder.containsContentPath(contentPath)) { + // This means that the exact same file was added multiple times and it is being used as the source + // of the file content + return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + } + } - if (versionBuilder.containsLogicalPath(logicalPath)) { - var oldFileId = versionBuilder.removeLogicalPath(logicalPath); - removeFileFromManifest(oldFileId); - } + // This is the case when the same logical path was added multiple times, but the content changed + if (oldFileId != null) { + versionBuilder.removeLogicalPath(logicalPath); + removeFileFromManifest(oldFileId); + } - String contentPath = null; + String contentPath = null; - if (!inventoryBuilder.containsFileId(fileId)) { - contentPath = contentPathMapper.fromLogicalPath(logicalPath); - inventoryBuilder.addFileToManifest(fileId, contentPath); - } + if (!inventoryBuilder.containsFileId(fileId)) { + contentPath = contentPathMapper.fromLogicalPath(logicalPath); + inventoryBuilder.addFileToManifest(fileId, contentPath); + } - versionBuilder.addFile(fileId, logicalPath); + versionBuilder.addFile(fileId, logicalPath); - return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + } finally { + lock.unlock(); + } } /** @@ -261,16 +295,21 @@ public String innerContentPath(String logicalPath) { * @param digest the digest value */ public void addFixity(String logicalPath, DigestAlgorithm algorithm, String digest) { - if (algorithm.equals(inventory.getDigestAlgorithm())) { - return; - } + lock.lock(); + try { + if (algorithm.equals(inventory.getDigestAlgorithm())) { + return; + } - var fileId = versionBuilder.getFileId(logicalPath); + var fileId = versionBuilder.getFileId(logicalPath); - if (fileId != null) { - inventoryBuilder.getContentPaths(fileId).forEach(contentPath -> { - inventoryBuilder.addFixityForFile(contentPath, algorithm, digest); - }); + if (fileId != null) { + inventoryBuilder.getContentPaths(fileId).forEach(contentPath -> { + inventoryBuilder.addFixityForFile(contentPath, algorithm, digest); + }); + } + } finally { + lock.unlock(); } } @@ -282,25 +321,35 @@ public void addFixity(String logicalPath, DigestAlgorithm algorithm, String dige * @return the digest or null */ public String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { - if (inventory.getDigestAlgorithm().equals(algorithm)) { - return versionBuilder.getFileId(logicalPath); - } + lock.lock(); + try { + if (inventory.getDigestAlgorithm().equals(algorithm)) { + return versionBuilder.getFileId(logicalPath); + } - String digest = null; - var fileId = versionBuilder.getFileId(logicalPath); + String digest = null; + var fileId = versionBuilder.getFileId(logicalPath); - if (fileId != null) { - digest = inventoryBuilder.getFileFixity(fileId, algorithm); - } + if (fileId != null) { + digest = inventoryBuilder.getFileFixity(fileId, algorithm); + } - return digest; + return digest; + } finally { + lock.unlock(); + } } /** * Removes all entries from the fixity block. */ public void clearFixity() { - inventoryBuilder.clearFixity(); + lock.lock(); + try { + inventoryBuilder.clearFixity(); + } finally { + lock.unlock(); + } } /** @@ -311,8 +360,13 @@ public void clearFixity() { * @return files that were removed from the manifest */ public Set removeFile(String logicalPath) { - var fileId = versionBuilder.removeLogicalPath(logicalPath); - return removeFileFromManifestWithResults(fileId); + lock.lock(); + try { + var fileId = versionBuilder.removeLogicalPath(logicalPath); + return removeFileFromManifestWithResults(fileId); + } finally { + lock.unlock(); + } } /** @@ -326,25 +380,30 @@ public Set removeFile(String logicalPath) { * @return files that were removed from the manifest */ public Set renameFile(String srcLogicalPath, String dstLogicalPath, OcflOption... options) { - logicalPathConstraints.apply(dstLogicalPath); + lock.lock(); + try { + logicalPathConstraints.apply(dstLogicalPath); - var srcDigest = versionBuilder.getFileId(srcLogicalPath); + var srcDigest = versionBuilder.getFileId(srcLogicalPath); - if (srcDigest == null) { - throw new OcflInputException( - String.format("The following path was not found in object %s: %s", objectId, srcLogicalPath)); - } + if (srcDigest == null) { + throw new OcflInputException( + String.format("The following path was not found in object %s: %s", objectId, srcLogicalPath)); + } - overwriteProtection(dstLogicalPath, options); - versionBuilder.validateNonConflictingPath(dstLogicalPath); + overwriteProtection(dstLogicalPath, options); + versionBuilder.validateNonConflictingPath(dstLogicalPath); - var dstFileId = versionBuilder.getFileId(dstLogicalPath); + var dstFileId = versionBuilder.getFileId(dstLogicalPath); - versionBuilder.removeLogicalPath(srcLogicalPath); - versionBuilder.removeLogicalPath(dstLogicalPath); - versionBuilder.addFile(srcDigest, dstLogicalPath); + versionBuilder.removeLogicalPath(srcLogicalPath); + versionBuilder.removeLogicalPath(dstLogicalPath); + versionBuilder.addFile(srcDigest, dstLogicalPath); - return removeFileFromManifestWithResults(dstFileId); + return removeFileFromManifestWithResults(dstFileId); + } finally { + lock.unlock(); + } } /** @@ -360,32 +419,42 @@ public Set renameFile(String srcLogicalPath, String dstLogical */ public Set reinstateFile( VersionNum sourceVersion, String srcLogicalPath, String dstLogicalPath, OcflOption... options) { - logicalPathConstraints.apply(dstLogicalPath); + lock.lock(); + try { + logicalPathConstraints.apply(dstLogicalPath); - var srcDigest = getDigestFromVersion(sourceVersion, srcLogicalPath); + var srcDigest = getDigestFromVersion(sourceVersion, srcLogicalPath); - if (srcDigest == null) { - throw new OcflInputException(String.format( - "Object %s version %s does not contain a file at %s", objectId, sourceVersion, srcLogicalPath)); - } + if (srcDigest == null) { + throw new OcflInputException(String.format( + "Object %s version %s does not contain a file at %s", objectId, sourceVersion, srcLogicalPath)); + } - overwriteProtection(dstLogicalPath, options); - versionBuilder.validateNonConflictingPath(dstLogicalPath); + overwriteProtection(dstLogicalPath, options); + versionBuilder.validateNonConflictingPath(dstLogicalPath); - var dstFileId = versionBuilder.getFileId(dstLogicalPath); + var dstFileId = versionBuilder.getFileId(dstLogicalPath); - versionBuilder.removeLogicalPath(dstLogicalPath); - versionBuilder.addFile(srcDigest, dstLogicalPath); + versionBuilder.removeLogicalPath(dstLogicalPath); + versionBuilder.addFile(srcDigest, dstLogicalPath); - return removeFileFromManifestWithResults(dstFileId); + return removeFileFromManifestWithResults(dstFileId); + } finally { + lock.unlock(); + } } /** * Removes all of the files from the version's state. */ public void clearState() { - var state = new HashSet<>(versionBuilder.getInvertedState().keySet()); - state.forEach(this::removeFile); + lock.lock(); + try { + var state = new HashSet<>(versionBuilder.getInvertedState().keySet()); + state.forEach(this::removeFile); + } finally { + lock.unlock(); + } } private String getDigestFromVersion(VersionNum versionNum, String logicalPath) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java b/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java index 6decedcb..84e99155 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java @@ -95,6 +95,7 @@ private T doInLock(String objectId, Lock lock, Callable doInLock) { throw new LockException("Failed to acquire lock for object " + objectId); } } catch (InterruptedException e) { + Thread.currentThread().interrupt(); throw new LockException(e); } } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java index 39fd19db..1090a6d4 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java @@ -619,6 +619,7 @@ public ValidationResults validateObject(String objectId, boolean contentFixityCh public void close() { LOG.debug("Closing {}", this.getClass().getName()); super.close(); + storage.close(); } @Override diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java index 188bde5e..a34d48a3 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java @@ -27,12 +27,18 @@ import java.io.InputStream; import java.nio.file.Path; import java.util.Collection; +import java.util.concurrent.Future; /** * Wrapper interface abstracting cloud provider clients */ public interface CloudClient { + /** + * Close any resources the client may have created. This will NOT close resources that were passed into the client. + */ + void close(); + /** * The name of the bucket the OCFL repository is in. * @@ -48,6 +54,24 @@ public interface CloudClient { */ String prefix(); + /** + * Asynchronously uploads a file to the destination, and returns the object key. + * + * @param srcPath src file + * @param dstPath object path + * @return object key + */ + Future uploadFileAsync(Path srcPath, String dstPath); + + /** + * Asynchronously uploads a file to the destination, and returns the object key. + * + * @param srcPath src file + * @param dstPath object path + * @param contentType the content type of the data + * @return object key + */ + Future uploadFileAsync(Path srcPath, String dstPath, String contentType); /** * Uploads a file to the destination, and returns the object key. * diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java index 4ad731e7..8df31f07 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java @@ -27,6 +27,7 @@ import io.ocfl.api.OcflFileRetriever; import io.ocfl.api.exception.OcflFileAlreadyExistsException; import io.ocfl.api.exception.OcflIOException; +import io.ocfl.api.exception.OcflJavaException; import io.ocfl.api.exception.OcflNoSuchFileException; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.api.util.Enforce; @@ -42,8 +43,8 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; +import java.util.concurrent.Future; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -239,16 +240,48 @@ public void copyFileInternal(String sourceFile, String destinationFile) { public void moveDirectoryInto(Path source, String destination) { failOnExistingDir(destination); - var objectKeys = Collections.synchronizedList(new ArrayList()); + var objectKeys = new ArrayList(); try (var paths = Files.find(source, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { - paths.forEach(file -> { - var relative = FileUtil.pathToStringStandardSeparator(source.relativize(file)); - var key = FileUtil.pathJoinFailEmpty(destination, relative); - client.uploadFile(file, key); - objectKeys.add(key); - }); + var hasErrors = false; + var interrupted = false; + var futures = new ArrayList>(); + + try { + for (var it = paths.iterator(); it.hasNext(); ) { + var file = it.next(); + var relative = FileUtil.pathToStringStandardSeparator(source.relativize(file)); + var key = FileUtil.pathJoinFailEmpty(destination, relative); + futures.add(client.uploadFileAsync(file, key)); + } + } catch (RuntimeException e) { + // If any of the uploads fail before the future is created, we want to short-circuit but still need + // to wait for the successfully started uploads to complete. + hasErrors = true; + LOG.error(e.getMessage(), e); + } + + for (var future : futures) { + try { + objectKeys.add(future.get().getKey()); + } catch (InterruptedException e) { + hasErrors = true; + interrupted = true; + } catch (Exception e) { + hasErrors = true; + LOG.error(e.getMessage(), e); + } + } + + if (interrupted) { + Thread.currentThread().interrupt(); + } + + if (hasErrors) { + throw new OcflJavaException("Failed to move files in " + source + " into " + destination); + } } catch (IOException | RuntimeException e) { + // If any of the files failed to upload, then we must delete everything. client.safeDeleteObjects(objectKeys); if (e instanceof IOException) { @@ -328,6 +361,14 @@ public void deleteEmptyDirsUp(String path) { // no-op } + /** + * {@inheritDoc} + */ + @Override + public void close() { + client.close(); + } + private void failOnExistingFile(String path) { if (fileExists(path)) { throw new OcflFileAlreadyExistsException(String.format("File %s already exists", path)); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java index 14f3cbe9..c5fc535a 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java @@ -188,4 +188,9 @@ public interface Storage { * @param path starting path */ void deleteEmptyDirsUp(String path); + + /** + * Closes any resources the storage implementation may have open. + */ + void close(); } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java index 2e94dd92..58717660 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java @@ -347,4 +347,12 @@ public void deleteEmptyDirsUp(String path) { var fullPath = storageRoot.resolve(path); FileUtil.deleteDirAndParentsIfEmpty(fullPath); } + + /** + * {@inheritDoc} + */ + @Override + public void close() { + // no-op + } } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java index a1408ebe..d84e9cf4 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java @@ -222,7 +222,7 @@ public static void deleteChildren(Path root) { */ public static void deleteEmptyDirs(Path root) { try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isDirectory())) { - files.filter(f -> !f.equals(root)).forEach(FileUtil::deleteDirIfEmpty); + files.filter(f -> !f.equals(root)).sorted(Comparator.reverseOrder()).forEach(FileUtil::deleteDirIfEmpty); } catch (NoSuchFileException e) { // ignore } catch (IOException e) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java b/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java new file mode 100644 index 00000000..e1de42ac --- /dev/null +++ b/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java @@ -0,0 +1,31 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.core.util; + +@FunctionalInterface +public interface UncheckedCallable { + + V call(); +} diff --git a/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java b/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java index 86e0e9f1..9e9c61f7 100644 --- a/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java +++ b/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java @@ -97,9 +97,8 @@ public void shouldDeleteAllEmptyDirectories() throws IOException { FileUtil.deleteEmptyDirs(tempRoot); - assertThat(tempRoot.resolve("a/b/c").toFile(), anExistingDirectory()); + assertThat(tempRoot.resolve("a/b/c").toFile(), not(anExistingDirectory())); assertThat(tempRoot.resolve("a/c/file3").toFile(), anExistingFile()); - assertThat(tempRoot.resolve("a/b/c/d").toFile(), not(anExistingDirectory())); assertThat(tempRoot.resolve("a/d").toFile(), not(anExistingDirectory())); } diff --git a/ocfl-java-itest/pom.xml b/ocfl-java-itest/pom.xml index e4201f95..07629f4e 100644 --- a/ocfl-java-itest/pom.xml +++ b/ocfl-java-itest/pom.xml @@ -71,6 +71,11 @@ junit-jupiter test + + org.assertj + assertj-core + test + org.hamcrest hamcrest @@ -122,27 +127,13 @@ test - io.micrometer - micrometer-core - 1.12.3 - test - - - io.micrometer - micrometer-registry-prometheus - 1.12.3 - test - - - io.prometheus - simpleclient_httpserver - 0.16.0 + software.amazon.awssdk.crt + aws-crt test - io.prometheus - simpleclient - 0.16.0 + org.hdrhistogram + HdrHistogram test diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java index 22d8f1e4..f0123a52 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java @@ -1,11 +1,6 @@ package io.ocfl.itest; -import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.config.MeterFilter; -import io.micrometer.core.instrument.distribution.DistributionStatisticConfig; -import io.micrometer.prometheus.PrometheusConfig; -import io.micrometer.prometheus.PrometheusMeterRegistry; import io.ocfl.api.MutableOcflRepository; import io.ocfl.api.OcflRepository; import io.ocfl.api.model.ObjectVersionId; @@ -16,11 +11,9 @@ import io.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; import io.ocfl.core.util.FileUtil; import io.ocfl.core.util.UncheckedFiles; -import io.prometheus.client.exporter.HTTPServer; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.UncheckedIOException; -import java.net.InetSocketAddress; import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; @@ -31,21 +24,17 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; +import org.HdrHistogram.Histogram; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; @Disabled public class LoadITest { - // AVG: rate(putObject_seconds_sum[1m])/rate(putObject_seconds_count[1m]) - // p99: histogram_quantile(0.99, sum(rate(putObject_seconds_bucket[1m])) by (le)) - private static final int KB = 1024; private static final long MB = 1024 * KB; @@ -54,44 +43,7 @@ public class LoadITest { @TempDir public Path tempRoot; - private static HTTPServer prometheusServer; - - @BeforeAll - public static void beforeAll() throws IOException { - var registry = new PrometheusMeterRegistry(new PrometheusConfig() { - @Override - public Duration step() { - return Duration.ofSeconds(30); - } - - @Override - public String get(final String key) { - return null; - } - }); - // Enables distribution stats for all timer metrics - registry.config().meterFilter(new MeterFilter() { - @Override - public DistributionStatisticConfig configure(final Meter.Id id, final DistributionStatisticConfig config) { - if (id.getType() == Meter.Type.TIMER) { - return DistributionStatisticConfig.builder() - .percentilesHistogram(true) - .percentiles(0.5, 0.90, 0.99) - .build() - .merge(config); - } - return config; - } - }); - Metrics.addRegistry(registry); - - prometheusServer = new HTTPServer(new InetSocketAddress(1234), registry.getPrometheusRegistry()); - } - - @AfterAll - public static void afterAll() { - prometheusServer.stop(); - } + private static final Histogram histogram = new Histogram(3600000000000L, 3); @Test public void fsPutObjectSmallFilesTest() throws InterruptedException { @@ -264,12 +216,11 @@ public void s3WriteTest() throws InterruptedException { var objectPath = createTestObject(1, 3 * MB); var prefix = UUID.randomUUID().toString(); - var s3Client = S3Client.builder() - .region(Region.US_EAST_2) - .httpClientBuilder(ApacheHttpClient.builder()) - .build(); + var s3Client = S3AsyncClient.crtBuilder().region(Region.US_EAST_2).build(); + var transferManager = S3TransferManager.builder().s3Client(s3Client).build(); var cloutClient = OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket("pwinckles-ocfl") .repoPrefix(prefix) .build(); @@ -293,6 +244,9 @@ public void s3WriteTest() throws InterruptedException { System.out.println("Finished. Waiting for metrics collection..."); TimeUnit.SECONDS.sleep(30); System.out.println("Done"); + + s3Client.close(); + transferManager.close(); } private void runPutTest( @@ -305,6 +259,7 @@ private void runPutTest( boolean shouldPurge) throws InterruptedException { System.out.println("Starting putTest"); + histogram.reset(); System.out.println("Creating test object"); var objectPath = createTestObject(fileCount, fileSize); @@ -313,24 +268,14 @@ private void runPutTest( var versionInfo = new VersionInfo().setUser("Peter", "pwinckles@example.com").setMessage("Testing"); - var timer = Metrics.timer( - "putObject", - "files", - String.valueOf(fileCount), - "sizeBytes", - String.valueOf(fileSize), - "threads", - String.valueOf(threadCount), - "storage", - storageType); - var threads = new ArrayList(threadCount); for (var i = 0; i < threadCount; i++) { threads.add(createThread(duration, objectId -> { - timer.record(() -> { - repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo); - }); + var start = System.nanoTime(); + repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo); + var end = System.nanoTime(); + histogram.recordValue(end - start); if (shouldPurge) { repo.purgeObject(objectId); } @@ -340,16 +285,18 @@ private void runPutTest( startThreads(threads); System.out.println("Waiting for threads to complete..."); joinThreads(threads); - - System.out.println("Finished. Waiting for metrics collection..."); - TimeUnit.SECONDS.sleep(30); System.out.println("Done"); + + System.out.printf( + "putTest results for %s files=%d size=%s threads=%s%n", storageType, fileSize, fileCount, threadCount); + histogram.outputPercentileDistribution(System.out, 1_000_000.0); } private void runGetTest( OcflRepository repo, int fileCount, long fileSize, int threadCount, Duration duration, String storageType) throws InterruptedException { System.out.println("Starting getTest"); + histogram.reset(); System.out.println("Creating test object"); var objectPath = createTestObject(fileCount, fileSize); @@ -362,25 +309,15 @@ private void runGetTest( repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo); - var timer = Metrics.timer( - "getObject", - "files", - String.valueOf(fileCount), - "sizeBytes", - String.valueOf(fileSize), - "threads", - String.valueOf(threadCount), - "storage", - storageType); - var threads = new ArrayList(threadCount); for (var i = 0; i < threadCount; i++) { threads.add(createThread(duration, out -> { var outDir = tempRoot.resolve(out); - timer.record(() -> { - repo.getObject(ObjectVersionId.head(objectId), outDir); - }); + var start = System.nanoTime(); + repo.getObject(ObjectVersionId.head(objectId), outDir); + var end = System.nanoTime(); + histogram.recordValue(end - start); FileUtil.safeDeleteDirectory(outDir); })); } @@ -388,10 +325,11 @@ private void runGetTest( startThreads(threads); System.out.println("Waiting for threads to complete..."); joinThreads(threads); - - System.out.println("Finished. Waiting for metrics collection..."); - TimeUnit.SECONDS.sleep(30); System.out.println("Done"); + + System.out.printf( + "getTest results for %s files=%d size=%s threads=%s%n", storageType, fileSize, fileCount, threadCount); + histogram.outputPercentileDistribution(System.out, 1_000_000.0); } private Thread createThread(Duration duration, Consumer test) { @@ -445,10 +383,22 @@ private OcflRepository createFsRepo() { } private MutableOcflRepository createS3Repo() { - var s3Client = S3Client.builder() - .region(Region.US_EAST_2) - .httpClientBuilder(ApacheHttpClient.builder()) - .build(); + // var s3Client = S3AsyncClient.builder() + // .region(Region.US_EAST_2) + // .httpClientBuilder(NettyNioAsyncHttpClient.builder() + // .connectionAcquisitionTimeout(Duration.ofSeconds(60)) + // .writeTimeout(Duration.ofSeconds(0)) + // .readTimeout(Duration.ofSeconds(0)) + // .maxConcurrency(100)) + // .build(); + // var transferManager = S3TransferManager.builder() + // .s3Client(MultipartS3AsyncClient.create( + // s3Client, MultipartConfiguration.builder().build())) + // .build(); + + var s3Client = S3AsyncClient.crtBuilder().region(Region.US_EAST_2).build(); + var transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + var prefix = UUID.randomUUID().toString(); // Note this is NOT using a db, which an S3 setup would normally use return new OcflRepositoryBuilder() @@ -459,6 +409,7 @@ private MutableOcflRepository createS3Repo() { .bucket("pwinckles-ocfl") .repoPrefix(prefix) .s3Client(s3Client) + .transferManager(transferManager) .build()); }) .workDir(UncheckedFiles.createDirectories(tempRoot.resolve("temp"))) diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java index 18e9fa47..33effe3f 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java @@ -29,6 +29,7 @@ import java.nio.file.Paths; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Phaser; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import org.hamcrest.Matchers; @@ -447,24 +448,26 @@ public void rejectUpdateWhenConcurrentChangeToPreviousVersionOfMutableHead() thr updater.writeFile(streamString("file2"), "file2.txt"); }); + var phaser = new Phaser(2); + var future = CompletableFuture.runAsync(() -> { repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { - try { - TimeUnit.SECONDS.sleep(3); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); updater.writeFile(streamString("file3"), "file3.txt"); }); }); - TimeUnit.MILLISECONDS.sleep(100); + phaser.arriveAndAwaitAdvance(); repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1")); repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { updater.writeFile(streamString("file4"), "file4.txt"); }); + phaser.arriveAndAwaitAdvance(); + TimeUnit.MILLISECONDS.sleep(100); + OcflAsserts.assertThrowsWithMessage( ObjectOutOfSyncException.class, "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ", @@ -497,24 +500,26 @@ public void rejectUpdateWhenConcurrentChangeWhileCreatingMutableHead() throws In updater.writeFile(streamString("file2"), "file2.txt"); }); + var phaser = new Phaser(2); + var future = CompletableFuture.runAsync(() -> { repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { - try { - TimeUnit.SECONDS.sleep(3); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); updater.writeFile(streamString("file3"), "file3.txt"); }); }); - TimeUnit.MILLISECONDS.sleep(100); + phaser.arriveAndAwaitAdvance(); repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1")); repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { updater.writeFile(streamString("file4"), "file4.txt"); }); + phaser.arriveAndAwaitAdvance(); + TimeUnit.MILLISECONDS.sleep(100); + OcflAsserts.assertThrowsWithMessage( ObjectOutOfSyncException.class, "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ", diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java index d2cda81f..9f83e534 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java @@ -76,6 +76,7 @@ import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Phaser; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -2208,24 +2209,26 @@ public void rejectUpdateWhenConcurrentChangeToPreviousVersion() throws Interrupt updater.writeFile(ITestHelper.streamString("file2"), "file2.txt"); }); + var phaser = new Phaser(2); + var future = CompletableFuture.runAsync(() -> { repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { - try { - TimeUnit.SECONDS.sleep(3); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); updater.writeFile(ITestHelper.streamString("file3"), "file3.txt"); }); }); - TimeUnit.MILLISECONDS.sleep(100); + phaser.arriveAndAwaitAdvance(); repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1")); repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> { updater.writeFile(ITestHelper.streamString("file4"), "file4.txt"); }); + phaser.arriveAndAwaitAdvance(); + TimeUnit.MILLISECONDS.sleep(100); + OcflAsserts.assertThrowsWithMessage( ObjectOutOfSyncException.class, "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ", @@ -2790,7 +2793,7 @@ private void verifyStream(Path expectedFile, OcflObjectVersionFile actual) throw } } - private Path outputPath(String repoName, String path) { + protected Path outputPath(String repoName, String path) { try { var output = outputDir.resolve(Paths.get(repoName, path)); Files.createDirectories(output.getParent()); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java index 2ada8119..84c87538 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java @@ -7,7 +7,9 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import io.ocfl.api.OcflConstants; +import io.ocfl.api.OcflOption; import io.ocfl.api.OcflRepository; +import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.model.ObjectVersionId; import io.ocfl.core.OcflRepositoryBuilder; import io.ocfl.core.cache.NoOpCache; @@ -29,8 +31,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.Collectors; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledOnOs; import org.junit.jupiter.api.condition.OS; @@ -171,6 +178,147 @@ public void shouldNotCreateEmptyContentDirWhenVersionHasNoContent() { assertFalse(Files.exists(v2ContentPath), "empty content directories should not exist"); } + // There appears to be a bug with s3mock's copy object that makes this test fail for some reason + @Test + public void writeToObjectConcurrently() { + var repoName = "repo18"; + var repo = defaultRepo(repoName); + + var objectId = "o1"; + + var executor = Executors.newFixedThreadPool(10); + + try { + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("1"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString("file1".repeat(100)), "a/b/c/file1.txt", OcflOption.OVERWRITE); + })); + } + + for (int i = 0; i < 5; i++) { + var n = i; + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString(String.valueOf(n).repeat(100)), + String.format("a/b/c/d/%s.txt", n)); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("2"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + var errors = new AtomicInteger(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + try { + updater.renameFile("a/b/c/file1.txt", "a/b/c/file2.txt"); + } catch (OcflInputException e) { + errors.getAndIncrement(); + } + })); + } + + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/0.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/2.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("test".repeat(100)), "test.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.renameFile("a/b/c/d/4.txt", "a/b/c/d/1.txt", OcflOption.OVERWRITE); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("new".repeat(100)), "a/new.txt"); + })); + + joinFutures(futures); + + assertEquals(4, errors.get(), "4 out of 5 renames should have failed"); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("3"), updater -> { + var latch = new CountDownLatch(5); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.addPath(ITestHelper.expectedRepoPath("repo15"), "repo15", OcflOption.OVERWRITE); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("4"), updater -> { + var root = ITestHelper.expectedRepoPath("repo17"); + var futures = new ArrayList>(); + + try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + files.map(file -> executor.submit(() -> updater.addPath( + file, "repo17/" + FileUtil.pathToStringStandardSeparator(root.relativize(file))))) + .forEach(futures::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + joinFutures(futures); + }); + + Assertions.assertThat(repo.validateObject(objectId, true).getErrors()) + .isEmpty(); + + var outputPath1 = outputPath(repoName, objectId + "v1"); + repo.getObject(ObjectVersionId.version(objectId, 1), outputPath1); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v1"), outputPath1); + + var outputPath2 = outputPath(repoName, objectId + "v2"); + repo.getObject(ObjectVersionId.version(objectId, 2), outputPath2); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v2"), outputPath2); + + var outputPath3 = outputPath(repoName, objectId + "v3"); + repo.getObject(ObjectVersionId.version(objectId, 3), outputPath3); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v3"), outputPath3); + + var outputPath4 = outputPath(repoName, objectId + "v4"); + repo.getObject(ObjectVersionId.version(objectId, 4), outputPath4); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v4"), outputPath4); + } finally { + executor.shutdownNow(); + } + } + + private void joinFutures(List> futures) { + for (var future : futures) { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + @Override protected void onBefore() { reposDir = UncheckedFiles.createDirectories(tempRoot.resolve("repos")); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java index 700a7f7b..d4231726 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java @@ -17,11 +17,13 @@ import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3BadReposITest extends BadReposITest { @@ -33,7 +35,8 @@ public class S3BadReposITest extends BadReposITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -48,24 +51,33 @@ public static void beforeAll() { var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET"); if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { - LOG.info("Running tests against AWS"); + LOG.warn("Running tests against AWS"); s3Client = S3ITestHelper.createS3Client(accessKey, secretKey); S3BadReposITest.bucket = bucket; } else { - LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + LOG.warn("Running tests against S3 Mock"); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3BadReposITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3BadReposITest.bucket); - }); + request.bucket(S3BadReposITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -119,6 +131,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java index 193c4ead..91a0d536 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java @@ -2,12 +2,14 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.core.util.DigestUtil; import io.ocfl.core.util.FileUtil; import io.ocfl.itest.ITestHelper; import java.io.IOException; +import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -16,31 +18,66 @@ import org.junit.jupiter.api.Assertions; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; +import software.amazon.awssdk.utils.AttributeMap; public class S3ITestHelper { private static final String OCFL_SPEC_FILE = "ocfl_1.1.md"; - private S3Client s3Client; + private S3AsyncClient s3Client; - public S3ITestHelper(S3Client s3Client) { + public S3ITestHelper(S3AsyncClient s3Client) { this.s3Client = s3Client; } - public static S3Client createS3Client(String accessKey, String secretKey) { - return S3Client.builder() + public static S3AsyncClient createS3Client(String accessKey, String secretKey) { + return S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) .build(); } + public static S3AsyncClient createMockS3Client(String endpoint) { + return MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(endpoint)) + .region(Region.US_EAST_2) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); + } + + /** + * This nonsense is needed if you're using the MultipartS3AsyncClient client and want to download a file + * + * @param client + * @return + */ + public static S3AsyncClient resolveClient(S3AsyncClient client) { + if (client instanceof MultipartS3AsyncClient) { + return (S3AsyncClient) ((MultipartS3AsyncClient) client).delegate(); + } else { + return client; + } + } + public void verifyRepo(Path expected, String bucket, String prefix) { var expectedPaths = listAllFiles(expected); var actualObjects = listAllObjects(bucket, prefix); @@ -80,14 +117,15 @@ private List listAllFiles(Path root) { } private byte[] getObjectContent(String bucket, String prefix, String key) { - try (var result = s3Client.getObject(GetObjectRequest.builder() - .bucket(bucket) - .key(prefix + "/" + key) - .build())) { - return result.readAllBytes(); - } catch (IOException e) { - throw new RuntimeException(e); - } + return resolveClient(s3Client) + .getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(prefix + "/" + key) + .build(), + AsyncResponseTransformer.toBytes()) + .join() + .asByteArray(); } private String computeS3Digest(String bucket, String prefix, String key) { @@ -95,8 +133,11 @@ private String computeS3Digest(String bucket, String prefix, String key) { } public List listAllObjects(String bucket, String prefix) { - var result = s3Client.listObjectsV2( - ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).build()); + var result = s3Client.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(prefix) + .build()) + .join(); return result.contents().stream() .map(S3Object::key) diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java index 3bd533d4..12944c90 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java @@ -21,11 +21,13 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.function.Consumer; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3MutableHeadITest extends MutableHeadITest { @@ -37,7 +39,8 @@ public class S3MutableHeadITest extends MutableHeadITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -52,24 +55,33 @@ public static void beforeAll() { var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET"); if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { - LOG.info("Running tests against AWS"); + LOG.warn("Running tests against AWS"); s3Client = S3ITestHelper.createS3Client(accessKey, secretKey); S3MutableHeadITest.bucket = bucket; } else { - LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + LOG.warn("Running tests against S3 Mock"); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3MutableHeadITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3MutableHeadITest.bucket); - }); + request.bucket(S3MutableHeadITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -143,6 +155,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java index c1cb73f3..b503da27 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java @@ -8,7 +8,9 @@ import com.adobe.testing.s3mock.junit5.S3MockExtension; import com.mchange.v2.c3p0.ComboPooledDataSource; +import io.ocfl.api.OcflOption; import io.ocfl.api.OcflRepository; +import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.model.ObjectVersionId; import io.ocfl.api.model.VersionInfo; import io.ocfl.aws.OcflS3Client; @@ -26,22 +28,31 @@ import io.ocfl.itest.OcflITest; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3OcflITest extends OcflITest { @@ -57,7 +68,8 @@ public class S3OcflITest extends OcflITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -72,24 +84,33 @@ public static void beforeAll() { var bucket = System.getenv().get(ENV_BUCKET); if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { - LOG.info("Running tests against AWS"); + LOG.warn("Running tests against AWS"); s3Client = S3ITestHelper.createS3Client(accessKey, secretKey); S3OcflITest.bucket = bucket; } else { - LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + LOG.warn("Running tests against S3 Mock"); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3OcflITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3OcflITest.bucket); - }); + request.bucket(S3OcflITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -222,6 +243,150 @@ public void hashedIdLayoutLongEncoded() { verifyRepo(repoName); } + // There appears to be a bug with s3mock's copy object that makes this test fail for some reason + @Test + @EnabledIfEnvironmentVariable(named = ENV_ACCESS_KEY, matches = ".+") + @EnabledIfEnvironmentVariable(named = ENV_SECRET_KEY, matches = ".+") + @EnabledIfEnvironmentVariable(named = ENV_BUCKET, matches = ".+") + public void writeToObjectConcurrently() { + var repoName = "repo18"; + var repo = defaultRepo(repoName); + + var objectId = "o1"; + + var executor = Executors.newFixedThreadPool(10); + + try { + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("1"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString("file1".repeat(100)), "a/b/c/file1.txt", OcflOption.OVERWRITE); + })); + } + + for (int i = 0; i < 5; i++) { + var n = i; + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString(String.valueOf(n).repeat(100)), + String.format("a/b/c/d/%s.txt", n)); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("2"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + var errors = new AtomicInteger(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + try { + updater.renameFile("a/b/c/file1.txt", "a/b/c/file2.txt"); + } catch (OcflInputException e) { + errors.getAndIncrement(); + } + })); + } + + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/0.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/2.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("test".repeat(100)), "test.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.renameFile("a/b/c/d/4.txt", "a/b/c/d/1.txt", OcflOption.OVERWRITE); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("new".repeat(100)), "a/new.txt"); + })); + + joinFutures(futures); + + assertEquals(4, errors.get(), "4 out of 5 renames should have failed"); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("3"), updater -> { + var latch = new CountDownLatch(5); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.addPath(ITestHelper.expectedRepoPath("repo15"), "repo15", OcflOption.OVERWRITE); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("4"), updater -> { + var root = ITestHelper.expectedRepoPath("repo17"); + var futures = new ArrayList>(); + + try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + files.map(file -> executor.submit(() -> updater.addPath( + file, "repo17/" + FileUtil.pathToStringStandardSeparator(root.relativize(file))))) + .forEach(futures::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + joinFutures(futures); + }); + + Assertions.assertThat(repo.validateObject(objectId, true).getErrors()) + .isEmpty(); + + var outputPath1 = outputPath(repoName, objectId + "v1"); + repo.getObject(ObjectVersionId.version(objectId, 1), outputPath1); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v1"), outputPath1); + + var outputPath2 = outputPath(repoName, objectId + "v2"); + repo.getObject(ObjectVersionId.version(objectId, 2), outputPath2); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v2"), outputPath2); + + var outputPath3 = outputPath(repoName, objectId + "v3"); + repo.getObject(ObjectVersionId.version(objectId, 3), outputPath3); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v3"), outputPath3); + + var outputPath4 = outputPath(repoName, objectId + "v4"); + repo.getObject(ObjectVersionId.version(objectId, 4), outputPath4); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v4"), outputPath4); + } finally { + executor.shutdownNow(); + } + } + + private void joinFutures(List> futures) { + for (var future : futures) { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + @Override protected OcflRepository defaultRepo(String name, Consumer consumer) { var builder = new OcflRepositoryBuilder() @@ -276,6 +441,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java index 36722388..45e607af 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java @@ -13,11 +13,14 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3StorageTest extends StorageTest { @@ -27,7 +30,8 @@ public class S3StorageTest extends StorageTest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private Set repoPrefixes = new HashSet<>(); @@ -36,11 +40,19 @@ public class S3StorageTest extends StorageTest { @BeforeAll public static void beforeAll() { - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3StorageTest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3StorageTest.bucket); - }); + request.bucket(S3StorageTest.bucket); + }) + .join(); + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + } + + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); } @AfterEach @@ -65,16 +77,21 @@ protected void file(String path) { protected void file(String path, String content) { s3Client.putObject( - request -> { - request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); - }, - RequestBody.fromString(content)); + request -> { + request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); + }, + AsyncRequestBody.fromString(content)) + .join(); } protected String readFile(String path) { - try (var content = s3Client.getObject(request -> { - request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); - })) { + try (var content = S3ITestHelper.resolveClient(s3Client) + .getObject( + request -> { + request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); + }, + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { return new String(content.readAllBytes()); } catch (IOException e) { throw new UncheckedIOException(e); @@ -86,6 +103,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt new file mode 100644 index 00000000..e70fee9a --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt @@ -0,0 +1 @@ +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt new file mode 100644 index 00000000..8bb6cc73 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt @@ -0,0 +1 @@ +1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt new file mode 100644 index 00000000..e5a9d5e2 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt @@ -0,0 +1 @@ +2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 new file mode 100644 index 00000000..663554bf --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 @@ -0,0 +1 @@ +Test file 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 new file mode 100644 index 00000000..663554bf --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 @@ -0,0 +1 @@ +Test file 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json new file mode 100644 index 00000000..b7673122 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json @@ -0,0 +1,67 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v4", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "v4/content/file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v4" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 new file mode 100644 index 00000000..94f4c1ab --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 @@ -0,0 +1 @@ +30a3924608f567a5d0b1f65f54946bef2a89c94f3a7affaaced6019fe348dc8363938f432a0ebd3e6227489ec16aff2255446f52de3756c18b89d5a9c15bf18c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 new file mode 100644 index 00000000..49351eb5 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 @@ -0,0 +1 @@ +File 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json new file mode 100644 index 00000000..e0f738ff --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 new file mode 100644 index 00000000..437fe9a9 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 @@ -0,0 +1 @@ +53876869be2d544f58e25262264b1e7246121db66458e1698ccbb46610392650434457e46ef6db0e19b8dc764c420ec3bbb8f50a10482c7696347c2ca5c20e32 inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 new file mode 100644 index 00000000..9fbb45ed --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 @@ -0,0 +1 @@ +File 2 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 new file mode 100644 index 00000000..7b648e9c --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 @@ -0,0 +1 @@ +File 3 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json new file mode 100644 index 00000000..3e9b5a8e --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json @@ -0,0 +1,39 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v2", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 new file mode 100644 index 00000000..c7d12058 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 @@ -0,0 +1 @@ +4cc9f9c9e393ee6ddee579970ba6db0a5bf69f65f88a875e4f73189072d90a6e2d3d3d3672fc7bca4d81369c5a1d96837713c1d89f398b78c8a117412925720c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/content/.gitkeep b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/content/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json new file mode 100644 index 00000000..0e78e561 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json @@ -0,0 +1,52 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v3", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 new file mode 100644 index 00000000..a3fafdc7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 @@ -0,0 +1 @@ +6b85187577372f8eb1377b6db79df82eecf965bf7175c26355278341c5766556a2af7d9b1f35db4ffdd33826352e4f76cfbd8c77342af1ab1d68d804c9821857 inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 new file mode 100644 index 00000000..a205b376 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 @@ -0,0 +1 @@ +6543210 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json new file mode 100644 index 00000000..b7673122 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json @@ -0,0 +1,67 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v4", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "v4/content/file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v4" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 new file mode 100644 index 00000000..94f4c1ab --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 @@ -0,0 +1 @@ +30a3924608f567a5d0b1f65f54946bef2a89c94f3a7affaaced6019fe348dc8363938f432a0ebd3e6227489ec16aff2255446f52de3756c18b89d5a9c15bf18c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file diff --git a/pom.xml b/pom.xml index 2542e307..9607cb52 100644 --- a/pom.xml +++ b/pom.xml @@ -340,10 +340,15 @@ software.amazon.awssdk bom - 2.24.13 + 2.25.13 pom import + + software.amazon.awssdk.crt + aws-crt + 0.29.12 + @@ -392,6 +397,11 @@ 2.17.0 + + org.hdrhistogram + HdrHistogram + 2.1.12 +