Merge branch 'issue_817'

mjordan · Aug 23, 2024 · 2e0f7f5 · 2e0f7f5
2 parents 132062b + 3c14eac
commit 2e0f7f5
Show file tree

Hide file tree

Showing 7 changed files with 142 additions and 5 deletions.
diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py
@@ -82,6 +82,13 @@ def get_config(self):
                 config["temp_dir"], "csv_id_to_node_id_map.db"
             )
 
+        if "page_files_source_dir_field" in user_mods:
+            config["page_files_source_dir_field"] = user_mods[
+                "page_files_source_dir_field"
+            ]
+        else:
+            config["page_files_source_dir_field"] = config["id_field"]
+
         config["config_file"] = self.args.config
 
         return config

diff --git a/...s/assets/create_paged_content_from_directories_test/books_page_files_source_dir_field.yml b/...s/assets/create_paged_content_from_directories_test/books_page_files_source_dir_field.yml
@@ -0,0 +1,11 @@
+paged_content_from_directories: true
+paged_content_page_model_tid: http://id.loc.gov/ontologies/bibframe/part
+task: create
+host: https://islandora.traefik.me
+username: admin
+password: password
+input_dir: tests/assets/create_paged_content_from_directories_test/samplebooks
+input_csv: metadata_page_files_source_dir_field.csv
+standalone_media_url: true
+secure_ssl_only: false
+page_files_source_dir_field: directory
diff --git a/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv b/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv
@@ -1,2 +1,2 @@
 id,title,field_model
-book1,Using Islandora Workbench for Fun and Profit,Digital Document
+book1,Using Islandora Workbench for Fun and Profit,Paged Content
diff --git a/..._paged_content_from_directories_test/samplebooks/metadata_page_files_source_dir_field.csv b/..._paged_content_from_directories_test/samplebooks/metadata_page_files_source_dir_field.csv
@@ -0,0 +1,2 @@
+id,title,field_model,directory
+id:book1,Using Islandora Workbench for Fun and Profit,Paged Content,book1
diff --git a/tests/islandora_tests_paged_content.py b/tests/islandora_tests_paged_content.py
@@ -209,5 +209,116 @@ def tearDown(self):
             os.remove(rollback_file_path)
 
 
+class TestCreatePagedContentFromDirectoriesPageFilesSourceDirField(unittest.TestCase):
+
+    def setUp(self):
+        self.current_dir = os.path.dirname(os.path.abspath(__file__))
+        self.create_config_file_path = os.path.join(
+            self.current_dir,
+            "assets",
+            "create_paged_content_from_directories_test",
+            "books_page_files_source_dir_field.yml",
+        )
+
+        yaml = YAML()
+        with open(self.create_config_file_path, "r") as f:
+            config_file_contents = f.read()
+        config_data = yaml.load(config_file_contents)
+        config = {}
+        for k, v in config_data.items():
+            config[k] = v
+        self.islandora_host = config["host"]
+        self.islandora_username = config["username"]
+        self.islandora_password = config["password"]
+
+        self.create_cmd = ["./workbench", "--config", self.create_config_file_path]
+
+        self.temp_dir = tempfile.gettempdir()
+
+    def test_create_paged_content_from_directories(self):
+        requests.packages.urllib3.disable_warnings()
+        self.nids = list()
+        create_output = subprocess.check_output(self.create_cmd)
+        create_output = create_output.decode().strip()
+
+        # Write a file to the system's temp directory containing the node IDs of the
+        # nodes created during this test so they can be deleted in tearDown().
+        create_lines = create_output.splitlines()
+        for line in create_lines:
+            if "created at" in line:
+                nid = line.rsplit("/", 1)[-1]
+                nid = nid.strip(".")
+                # E.g. a URL alias.
+                if workbench_utils.value_is_numeric(nid) is False:
+                    url = line[line.find("http") :].strip(".")
+                    nid = workbench_utils.get_nid_from_url_without_config(url)
+                self.nids.append(nid)
+
+        self.assertEqual(len(self.nids), 4)
+
+        # Test a page object's 'field_member_of' value to see if it matches its
+        # parent's node ID. In this test, we'll test the second page. Note: the
+        # metadata CSV file used to create the paged content and page objects
+        # uses hard-coded term IDs from the Islandora Models taxonomy as used
+        # in the Islandora Playbook. If they change or are different in the
+        # Islandora this test is running against, this test will fail. Also note
+        # that this test creates media and does not delete them.
+        parent_node_id_to_test = self.nids[0]
+        # Get the REST feed for the parent node's members.
+        members_url = (
+            self.islandora_host
+            + "/node/"
+            + parent_node_id_to_test
+            + "/members?_format=json"
+        )
+        # Need to provide credentials for this REST export.
+        members_response = requests.get(
+            members_url,
+            auth=(self.islandora_username, self.islandora_password),
+            verify=False,
+        )
+        members = json.loads(members_response.text)
+
+        expected_member_weights = [1, 2, 3]
+        retrieved_member_weights = list()
+        for member in members:
+            retrieved_member_weights.append(int(member["field_weight"][0]["value"]))
+            # Test that each page indeed a member of the first node created during this test.
+            self.assertEqual(
+                int(parent_node_id_to_test),
+                int(member["field_member_of"][0]["target_id"]),
+            )
+
+        # Test that the weights assigned to the three pages are what we expect.
+        self.assertEqual(expected_member_weights, retrieved_member_weights)
+
+    def tearDown(self):
+        for nid in self.nids:
+            quick_delete_cmd = [
+                "./workbench",
+                "--config",
+                self.create_config_file_path,
+                "--quick_delete_node",
+                f"{self.islandora_host}/node/{nid}",
+            ]
+            quick_delete_output = subprocess.check_output(quick_delete_cmd)
+
+        preprocessed_csv_path = os.path.join(
+            self.temp_dir, "metadata_page_files_source_dir_field.csv.preprocessed"
+        )
+        if os.path.exists(preprocessed_csv_path):
+            os.remove(preprocessed_csv_path)
+
+        rollback_file_path = os.path.join(
+            self.current_dir,
+            "assets",
+            "create_paged_content_from_directories_test",
+            "samplebooks",
+            "rollback.csv",
+        )
+        if os.path.exists(rollback_file_path):
+            os.remove(rollback_file_path)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/workbench b/workbench
@@ -282,6 +282,10 @@ def create():
             if custom_field == "checksum":
                 continue
 
+            # 'directory' is a reserved CSV field.
+            if custom_field == "directory":
+                continue
+
             # We skip CSV columns whose headers use the 'media:video:field_foo' media track convention.
             if custom_field.startswith("media:"):
                 continue

diff --git a/workbench_utils.py b/workbench_utils.py
@@ -1816,6 +1816,7 @@ def check_input(config, args):
     # but it doesn't show up in any field configs.
     reserved_fields = [
         "file",
+        "directory",
         "media_use_tid",
         "checksum",
         "node_id",
@@ -3601,7 +3602,8 @@ def check_input(config, args):
             paged_content_from_directories_csv_data, start=1
         ):
             dir_path = os.path.join(
-                config["input_dir"], file_check_row[config["id_field"]]
+                config["input_dir"],
+                file_check_row[config["page_files_source_dir_field"]],
             )
             if not os.path.exists(dir_path) or os.path.isfile(dir_path):
                 message = (
@@ -5783,7 +5785,6 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None):
         row_num = 0
         unique_identifiers = []
 
-        # WIP on #812.
         # Prepare any "csv_row_filters", which we apply to each row, below.
         if "csv_row_filters" in config and len(config["csv_row_filters"]) > 0:
             row_filters_is = dict()
@@ -8545,7 +8546,8 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):
     # weight assigned to the page is the last segment in the filename, split from the rest of the filename using the
     # character defined in the 'paged_content_sequence_separator' config option.
     parent_id = parent_csv_record[config["id_field"]]
-    page_dir_path = os.path.join(config["input_dir"], str(parent_id).strip())
+    page_dir_name = parent_csv_record[config["page_files_source_dir_field"]]
+    page_dir_path = os.path.join(config["input_dir"], page_dir_name)
 
     if "paged_content_additional_page_media" in config:
         if "paged_content_image_file_extension" in config:
@@ -8771,7 +8773,7 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):
                 config, parent_id, parent_node_id, page_file_name, node_nid
             )
 
-            page_file_path = os.path.join(parent_id, page_file_name)
+            page_file_path = os.path.join(page_dir_name, page_file_name)
             fake_csv_record = collections.OrderedDict()
             fake_csv_record["title"] = page_title
             fake_csv_record["file"] = page_file_path
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		id,title,field_model,directory
		id:book1,Using Islandora Workbench for Fun and Profit,Paged Content,book1