Merge branch 'main' into issue_757

mjordan · Apr 16, 2024 · 7b5ef9b · 7b5ef9b
2 parents cabb260 + 19ffa9c
commit 7b5ef9b
Show file tree

Hide file tree

Showing 5 changed files with 295 additions and 50 deletions.
diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py
@@ -93,7 +93,11 @@ def get_user_config(self):
             try:
                 loaded = yaml.load(stream)
             except YAMLError as exc:
-                print(exc)
+                print(
+                    f"There appears to be a YAML syntax error in your configuration file, {self.args.config}. Remove the username and\npassword, and run the file through https://codebeautify.org/yaml-validator/ or your YAML validator of choice."
+                )
+                sys.exit()
+
         # 'media_file_fields' has been replaced with 'media_fields' and 'media_type_file_fields'.
         # This is aliasing code that can be removed at some point in the future.
         if "media_file_fields" in loaded:
@@ -204,6 +208,7 @@ def get_default_config(self):
             "log_response_status_code": False,
             "log_headers": False,
             "log_term_creation": True,
+            "log_file_name_and_line_number": False,
             "progress_bar": False,
             "user_agent": "Islandora Workbench",
             "allow_redirects": True,

diff --git a/tests/assets/check_test/fail_to_connect.yml b/tests/assets/check_test/fail_to_connect.yml
@@ -0,0 +1,6 @@
+task: create
+host: https://somebadhost.org
+username: admin
+password: password
+nodes_only: true
+secure_ssl_only: false
diff --git a/tests/islandora_tests_check.py b/tests/islandora_tests_check.py
@@ -20,6 +20,24 @@
 import workbench_utils
 
 
+class TestFailToConnect(unittest.TestCase):
+
+    def test_failed_to_connect(self):
+        self.current_dir = os.path.dirname(os.path.abspath(__file__))
+        config_file_path = os.path.join(
+            self.current_dir, "assets", "check_test", "fail_to_connect.yml"
+        )
+        cmd = ["./workbench", "--config", config_file_path, "--check"]
+        try:
+            output = subprocess.check_output(cmd)
+            output = output.decode().strip()
+            self.assertRegex(
+                output, "Workbench can't connect to https://somebadhost.org", ""
+            )
+        except subprocess.CalledProcessError as err:
+            pass
+
+
 class TestCreateCheck(unittest.TestCase):
 
     def setUp(self):

diff --git a/workbench b/workbench
@@ -15,6 +15,7 @@ import collections
 import subprocess
 import requests_cache
 from progress_bar import InitBar
+from ruamel.yaml import YAML
 from workbench_utils import *
 import workbench_fields
 from WorkbenchConfig import WorkbenchConfig
@@ -105,6 +106,17 @@ def create():
 
     row_count = 0
     for row in csv_data:
+        if (
+            "node_exists_verification_view_endpoint" in config
+            and get_node_exists_verification_view_endpoint(config) is not False
+        ):
+            candidate_node_id = verify_node_exists_by_key(config, copy.copy(row))
+            if candidate_node_id is not False:
+                message = f"Item in row {row[config['id_field']]} appears to already be in Drupal ({config['host']}/node/{candidate_node_id}), skipping it."
+                logging.warning(message)
+                print(message)
+                continue
+
         # Delete expired items from request_cache before processing a row.
         if config["enable_http_cache"] is True:
             requests_cache.delete(expired=True)
@@ -415,13 +427,14 @@ def create():
         # Execute node-specific post-create scripts, if any are configured.
         if "node_post_create" in config and len(config["node_post_create"]) > 0:
             for command in config["node_post_create"]:
-                post_task_output, post_task_return_code = (
-                    execute_entity_post_task_script(
-                        command,
-                        args.config,
-                        node_response.status_code,
-                        node_response.text,
-                    )
+                (
+                    post_task_output,
+                    post_task_return_code,
+                ) = execute_entity_post_task_script(
+                    command,
+                    args.config,
+                    node_response.status_code,
+                    node_response.text,
                 )
                 if post_task_return_code == 0:
                     logging.info(
@@ -873,13 +886,14 @@ def update():
             # Execute node-specific post-create scripts, if any are configured.
             if "node_post_update" in config and len(config["node_post_update"]) > 0:
                 for command in config["node_post_update"]:
-                    post_task_output, post_task_return_code = (
-                        execute_entity_post_task_script(
-                            command,
-                            args.config,
-                            node_response.status_code,
-                            node_response.text,
-                        )
+                    (
+                        post_task_output,
+                        post_task_return_code,
+                    ) = execute_entity_post_task_script(
+                        command,
+                        args.config,
+                        node_response.status_code,
+                        node_response.text,
                     )
                     if post_task_return_code == 0:
                         logging.info(
@@ -1724,9 +1738,12 @@ def update_media() -> None:
                     )
                     invalid_track_file = True
                     break
-                track_label, track_type, track_language, track_filepath = (
-                    track_file.split(":")
-                )
+                (
+                    track_label,
+                    track_type,
+                    track_language,
+                    track_filepath,
+                ) = track_file.split(":")
                 track_files_info["track_labels"].append(track_label)
                 track_files_info["track_types"].append(track_type)
                 track_files_info["track_languages"].append(track_language)
@@ -2304,13 +2321,14 @@ def create_from_files():
             # Execute node-specific post-create scripts, if any are configured.
             if "node_post_create" in config and len(config["node_post_create"]) > 0:
                 for command in config["node_post_create"]:
-                    post_task_output, post_task_return_code = (
-                        execute_entity_post_task_script(
-                            command,
-                            args.config,
-                            node_response.status_code,
-                            node_response.text,
-                        )
+                    (
+                        post_task_output,
+                        post_task_return_code,
+                    ) = execute_entity_post_task_script(
+                        command,
+                        args.config,
+                        node_response.status_code,
+                        node_response.text,
                     )
                     if post_task_return_code == 0:
                         logging.info(
@@ -2670,13 +2688,14 @@ def get_data_from_view():
                 # Execute node-specific post-export scripts, if any are configured.
                 if "node_post_export" in config and len(config["node_post_export"]) > 0:
                     for command in config["node_post_export"]:
-                        post_task_output, post_task_return_code = (
-                            execute_entity_post_task_script(
-                                command,
-                                args.config,
-                                response.status_code,
-                                json.dumps(node),
-                            )
+                        (
+                            post_task_output,
+                            post_task_return_code,
+                        ) = execute_entity_post_task_script(
+                            command,
+                            args.config,
+                            response.status_code,
+                            json.dumps(node),
                         )
                         if post_task_return_code == 0:
                             logging.info(
@@ -2748,13 +2767,14 @@ def get_data_from_view():
                         and len(config["node_post_export"]) > 0
                     ):
                         for command in config["node_post_export"]:
-                            post_task_output, post_task_return_code = (
-                                execute_entity_post_task_script(
-                                    command,
-                                    args.config,
-                                    response.status_code,
-                                    json.dumps(node),
-                                )
+                            (
+                                post_task_output,
+                                post_task_return_code,
+                            ) = execute_entity_post_task_script(
+                                command,
+                                args.config,
+                                response.status_code,
+                                json.dumps(node),
                             )
                             if post_task_return_code == 0:
                                 logging.info(
@@ -3303,7 +3323,18 @@ parser.add_argument(
 parser.add_argument("--version", action="version", version="Islandora Workbench 0.0.0")
 args = parser.parse_args()
 
-workbench_config = WorkbenchConfig(args)
+try:
+    workbench_config = WorkbenchConfig(args)
+except Exception as e:
+    # Workbench wouldn't get this far is the YAML config file has syntax errors or is not found.
+    yaml = YAML()
+    with open(args.config, "r") as stream:
+        config_to_get_host_value = yaml.load(stream)
+    print(f"Oops, Workbench can't connect to {config_to_get_host_value['host']}.")
+    print(
+        'Confirm your "host" configuration setting is correct and that the website is running, and try again.'
+    )
+    sys.exit()
 config = workbench_config.get_config()
 
 create_temp_dir(config)
@@ -3323,13 +3354,22 @@ if config["secondary_tasks"] is not None and len(config["secondary_tasks"]) > 0:
 for handler in logging.root.handlers[:]:
     logging.root.removeHandler(handler)
 
-logging.basicConfig(
-    filename=config["log_file_path"],
-    level=logging.INFO,
-    filemode=config["log_file_mode"],
-    format="%(asctime)s - %(levelname)s - %(message)s",
-    datefmt="%d-%b-%y %H:%M:%S",
-)
+if config["log_file_name_and_line_number"] is True:
+    logging.basicConfig(
+        filename=config["log_file_path"],
+        level=logging.INFO,
+        filemode=config["log_file_mode"],
+        format="%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s",
+        datefmt="%d-%b-%y %H:%M:%S",
+    )
+else:
+    logging.basicConfig(
+        filename=config["log_file_path"],
+        level=logging.INFO,
+        filemode=config["log_file_mode"],
+        format="%(asctime)s - %(levelname)s - %(message)s",
+        datefmt="%d-%b-%y %H:%M:%S",
+    )
 
 if "check" in config.keys():
     tasks_to_skip = ["create_from_files", "get_data_from_view"]