From 07a74f67498e6266ab4b4a6efed50004249c775f Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Wed, 28 Feb 2024 20:24:51 -0600 Subject: [PATCH] Fix bug staging identical files to mutable head Creating a mutable head revision that only contains a file with identical contents to a file already in the mutable head is now properly writen to the object. Resolves #105 --- .../ocfl/core/storage/DefaultOcflStorage.java | 13 +- .../java/io/ocfl/itest/MutableHeadITest.java | 17 + .../0004-hashed-n-tuple-storage-layout.md | 303 ++++++++++++++++++ .../expected/repos/mutable10/0=ocfl_1.1 | 1 + .../0=ocfl_object_1.1 | 1 + .../head/content/r1/dir1/file3 | 1 + .../0005-mutable-head/head/inventory.json | 33 ++ .../head/inventory.json.sha512 | 1 + .../extensions/0005-mutable-head/revisions/r2 | 1 + .../root-inventory.json.sha512 | 1 + .../inventory.json | 20 ++ .../inventory.json.sha512 | 1 + .../v1/inventory.json | 20 ++ .../v1/inventory.json.sha512 | 1 + .../config.json | 7 + .../repos/mutable10/ocfl_extensions_1.0.md | 118 +++++++ .../expected/repos/mutable10/ocfl_layout.json | 4 + pom.xml | 4 +- 18 files changed, 540 insertions(+), 7 deletions(-) create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/0004-hashed-n-tuple-storage-layout.md create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/0=ocfl_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/content/r1/dir1/file3 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/revisions/r2 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/root-inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/extensions/0004-hashed-n-tuple-storage-layout/config.json create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_extensions_1.0.md create mode 100644 ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_layout.json diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java index 075cffe4..106a4192 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java @@ -751,13 +751,16 @@ private void storeNewMutableHeadVersion(Inventory inventory, Path stagingDir) { private void moveToRevisionDirectory( Inventory inventory, ObjectPaths.ObjectRoot objectRoot, Path stagingDir, String destination) { + var revisionStagingDir = stagingDir + .resolve(inventory.resolveContentDirectory()) + .resolve(inventory.getRevisionNum().toString()); + if (Files.notExists(revisionStagingDir)) { + // If the directory doesn't exist, then it means there were no new files added and nothing to do + return; + } storage.createDirectories(objectRoot.headVersion().contentPath()); try { - storage.moveDirectoryInto( - stagingDir - .resolve(inventory.resolveContentDirectory()) - .resolve(inventory.getRevisionNum().toString()), - destination); + storage.moveDirectoryInto(revisionStagingDir, destination); } catch (OcflFileAlreadyExistsException e) { throw new ObjectOutOfSyncException(String.format( "Failed to update mutable HEAD of object %s. Changes are out of sync with the current object state.", diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java index 5caba7d5..7a361ffa 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java @@ -617,6 +617,23 @@ public void cleanupOldRevisionMarkers() { verifyRepo(repoName); } + @Test + public void stageIdenticalFileToFileAlreadyStaged() { + var repoName = "mutable10"; + var repo = defaultRepo(repoName); + + var objectId = "o1"; + + repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("stage 1"), updater -> { + updater.writeFile(new ByteArrayInputStream("file3".getBytes()), "dir1/file3"); + }); + repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("stage 1"), updater -> { + updater.writeFile(new ByteArrayInputStream("file3".getBytes()), "file4"); + }); + + verifyRepo(repoName); + } + private Path outputPath(String repoName, String path) { try { var output = outputDir.resolve(Paths.get(repoName, path)); diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/content/r1/dir1/file3 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/content/r1/dir1/file3 new file mode 100644 index 00000000..873fb8d6 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/content/r1/dir1/file3 @@ -0,0 +1 @@ +file3 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json new file mode 100644 index 00000000..57a0b6c3 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json @@ -0,0 +1,33 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v2", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "b10ff867df18165a0e100d99cd3d27f845f7ef9ad84eeb627a53aabaea04805940c3693154b8a32541a31887dda9fb1e667e93307473b1c581021714768bd032" : [ "extensions/0005-mutable-head/head/content/r1/dir1/file3" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "Auto-generated empty object version.", + "user" : { + "name" : "ocfl-java", + "address" : "https://github.com/OCFL/ocfl-java" + }, + "state" : { } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "stage 1", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "b10ff867df18165a0e100d99cd3d27f845f7ef9ad84eeb627a53aabaea04805940c3693154b8a32541a31887dda9fb1e667e93307473b1c581021714768bd032" : [ "dir1/file3", "file4" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json.sha512 new file mode 100644 index 00000000..f830ddb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/head/inventory.json.sha512 @@ -0,0 +1 @@ +9fb7a43c422f1f8150c937afe2a4349483a001db359c2283eb0945c42dee5ab217edc41aab3316e25db13c55ddc27115891305a285d0e7bdebd17e1ebe7d53e4 inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/revisions/r2 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/revisions/r2 new file mode 100644 index 00000000..8eeebd0b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/revisions/r2 @@ -0,0 +1 @@ +r2 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/root-inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/root-inventory.json.sha512 new file mode 100644 index 00000000..a1797951 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/extensions/0005-mutable-head/root-inventory.json.sha512 @@ -0,0 +1 @@ +badf8cc54bf7ce56839be99de1e587c4e7c0dd94599875d33b901191a7b78d92393f7c9703e95d889e79c17e401bdb3984287db8bcbe242e582adefbd2d9035f inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json new file mode 100644 index 00000000..ea2e9870 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json @@ -0,0 +1,20 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "Auto-generated empty object version.", + "user" : { + "name" : "ocfl-java", + "address" : "https://github.com/OCFL/ocfl-java" + }, + "state" : { } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 new file mode 100644 index 00000000..a1797951 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 @@ -0,0 +1 @@ +badf8cc54bf7ce56839be99de1e587c4e7c0dd94599875d33b901191a7b78d92393f7c9703e95d889e79c17e401bdb3984287db8bcbe242e582adefbd2d9035f inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json new file mode 100644 index 00000000..ea2e9870 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json @@ -0,0 +1,20 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "Auto-generated empty object version.", + "user" : { + "name" : "ocfl-java", + "address" : "https://github.com/OCFL/ocfl-java" + }, + "state" : { } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 new file mode 100644 index 00000000..a1797951 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 @@ -0,0 +1 @@ +badf8cc54bf7ce56839be99de1e587c4e7c0dd94599875d33b901191a7b78d92393f7c9703e95d889e79c17e401bdb3984287db8bcbe242e582adefbd2d9035f inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/repos/mutable10/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9d092f5b..0c1b303f 100644 --- a/pom.xml +++ b/pom.xml @@ -177,13 +177,13 @@ com.diffplug.spotless spotless-maven-plugin - 2.36.0 + 2.43.0 - 2.30.0 + 2.40.0