diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 084d3c3b..d68d822c 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -82,6 +82,13 @@ def get_config(self): config["temp_dir"], "csv_id_to_node_id_map.db" ) + if "page_files_source_dir_field" in user_mods: + config["page_files_source_dir_field"] = user_mods[ + "page_files_source_dir_field" + ] + else: + config["page_files_source_dir_field"] = config["id_field"] + config["config_file"] = self.args.config return config diff --git a/tests/assets/create_paged_content_from_directories_test/books_page_files_source_dir_field.yml b/tests/assets/create_paged_content_from_directories_test/books_page_files_source_dir_field.yml new file mode 100644 index 00000000..f2a8cd17 --- /dev/null +++ b/tests/assets/create_paged_content_from_directories_test/books_page_files_source_dir_field.yml @@ -0,0 +1,11 @@ +paged_content_from_directories: true +paged_content_page_model_tid: http://id.loc.gov/ontologies/bibframe/part +task: create +host: https://islandora.traefik.me +username: admin +password: password +input_dir: tests/assets/create_paged_content_from_directories_test/samplebooks +input_csv: metadata_page_files_source_dir_field.csv +standalone_media_url: true +secure_ssl_only: false +page_files_source_dir_field: directory diff --git a/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv b/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv index d1eb1eb3..57096b1b 100644 --- a/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv +++ b/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata.csv @@ -1,2 +1,2 @@ id,title,field_model -book1,Using Islandora Workbench for Fun and Profit,Digital Document +book1,Using Islandora Workbench for Fun and Profit,Paged Content diff --git a/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata_page_files_source_dir_field.csv b/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata_page_files_source_dir_field.csv new file mode 100644 index 00000000..87e00128 --- /dev/null +++ b/tests/assets/create_paged_content_from_directories_test/samplebooks/metadata_page_files_source_dir_field.csv @@ -0,0 +1,2 @@ +id,title,field_model,directory +id:book1,Using Islandora Workbench for Fun and Profit,Paged Content,book1 diff --git a/tests/islandora_tests_paged_content.py b/tests/islandora_tests_paged_content.py index 03aa8291..9822492a 100644 --- a/tests/islandora_tests_paged_content.py +++ b/tests/islandora_tests_paged_content.py @@ -209,5 +209,116 @@ def tearDown(self): os.remove(rollback_file_path) +class TestCreatePagedContentFromDirectoriesPageFilesSourceDirField(unittest.TestCase): + + def setUp(self): + self.current_dir = os.path.dirname(os.path.abspath(__file__)) + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "create_paged_content_from_directories_test", + "books_page_files_source_dir_field.yml", + ) + + yaml = YAML() + with open(self.create_config_file_path, "r") as f: + config_file_contents = f.read() + config_data = yaml.load(config_file_contents) + config = {} + for k, v in config_data.items(): + config[k] = v + self.islandora_host = config["host"] + self.islandora_username = config["username"] + self.islandora_password = config["password"] + + self.create_cmd = ["./workbench", "--config", self.create_config_file_path] + + self.temp_dir = tempfile.gettempdir() + + def test_create_paged_content_from_directories(self): + requests.packages.urllib3.disable_warnings() + self.nids = list() + create_output = subprocess.check_output(self.create_cmd) + create_output = create_output.decode().strip() + + # Write a file to the system's temp directory containing the node IDs of the + # nodes created during this test so they can be deleted in tearDown(). + create_lines = create_output.splitlines() + for line in create_lines: + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") + # E.g. a URL alias. + if workbench_utils.value_is_numeric(nid) is False: + url = line[line.find("http") :].strip(".") + nid = workbench_utils.get_nid_from_url_without_config(url) + self.nids.append(nid) + + self.assertEqual(len(self.nids), 4) + + # Test a page object's 'field_member_of' value to see if it matches its + # parent's node ID. In this test, we'll test the second page. Note: the + # metadata CSV file used to create the paged content and page objects + # uses hard-coded term IDs from the Islandora Models taxonomy as used + # in the Islandora Playbook. If they change or are different in the + # Islandora this test is running against, this test will fail. Also note + # that this test creates media and does not delete them. + parent_node_id_to_test = self.nids[0] + # Get the REST feed for the parent node's members. + members_url = ( + self.islandora_host + + "/node/" + + parent_node_id_to_test + + "/members?_format=json" + ) + # Need to provide credentials for this REST export. + members_response = requests.get( + members_url, + auth=(self.islandora_username, self.islandora_password), + verify=False, + ) + members = json.loads(members_response.text) + + expected_member_weights = [1, 2, 3] + retrieved_member_weights = list() + for member in members: + retrieved_member_weights.append(int(member["field_weight"][0]["value"])) + # Test that each page indeed a member of the first node created during this test. + self.assertEqual( + int(parent_node_id_to_test), + int(member["field_member_of"][0]["target_id"]), + ) + + # Test that the weights assigned to the three pages are what we expect. + self.assertEqual(expected_member_weights, retrieved_member_weights) + + def tearDown(self): + for nid in self.nids: + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + f"{self.islandora_host}/node/{nid}", + ] + quick_delete_output = subprocess.check_output(quick_delete_cmd) + + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_page_files_source_dir_field.csv.preprocessed" + ) + if os.path.exists(preprocessed_csv_path): + os.remove(preprocessed_csv_path) + + rollback_file_path = os.path.join( + self.current_dir, + "assets", + "create_paged_content_from_directories_test", + "samplebooks", + "rollback.csv", + ) + if os.path.exists(rollback_file_path): + os.remove(rollback_file_path) + + if __name__ == "__main__": unittest.main() diff --git a/workbench b/workbench index 4a5ad589..562509be 100755 --- a/workbench +++ b/workbench @@ -282,6 +282,10 @@ def create(): if custom_field == "checksum": continue + # 'directory' is a reserved CSV field. + if custom_field == "directory": + continue + # We skip CSV columns whose headers use the 'media:video:field_foo' media track convention. if custom_field.startswith("media:"): continue diff --git a/workbench_utils.py b/workbench_utils.py index c6913828..dc5cc777 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -1816,6 +1816,7 @@ def check_input(config, args): # but it doesn't show up in any field configs. reserved_fields = [ "file", + "directory", "media_use_tid", "checksum", "node_id", @@ -3601,7 +3602,8 @@ def check_input(config, args): paged_content_from_directories_csv_data, start=1 ): dir_path = os.path.join( - config["input_dir"], file_check_row[config["id_field"]] + config["input_dir"], + file_check_row[config["page_files_source_dir_field"]], ) if not os.path.exists(dir_path) or os.path.isfile(dir_path): message = ( @@ -5783,7 +5785,6 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None): row_num = 0 unique_identifiers = [] - # WIP on #812. # Prepare any "csv_row_filters", which we apply to each row, below. if "csv_row_filters" in config and len(config["csv_row_filters"]) > 0: row_filters_is = dict() @@ -8545,7 +8546,8 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id): # weight assigned to the page is the last segment in the filename, split from the rest of the filename using the # character defined in the 'paged_content_sequence_separator' config option. parent_id = parent_csv_record[config["id_field"]] - page_dir_path = os.path.join(config["input_dir"], str(parent_id).strip()) + page_dir_name = parent_csv_record[config["page_files_source_dir_field"]] + page_dir_path = os.path.join(config["input_dir"], page_dir_name) if "paged_content_additional_page_media" in config: if "paged_content_image_file_extension" in config: @@ -8771,7 +8773,7 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id): config, parent_id, parent_node_id, page_file_name, node_nid ) - page_file_path = os.path.join(parent_id, page_file_name) + page_file_path = os.path.join(page_dir_name, page_file_name) fake_csv_record = collections.OrderedDict() fake_csv_record["title"] = page_title fake_csv_record["file"] = page_file_path