wjh18 · wjh18 · Jul 6, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,4 @@ build/
 # Package specific
 psi*.json
 psi*.xlsx
+psi.log
diff --git a/README.md b/README.md
@@ -214,7 +214,6 @@ Example:
 
 Full list of available metrics:
 
-* `observedTotalCumulativeLayoutShift`
 * `observedCumulativeLayoutShift`
 * `observedLargestContentfulPaintAllFrames`
 * `maxPotentialFID`

diff --git a/src/pyspeedinsights/api/keys.py b/src/pyspeedinsights/api/keys.py
@@ -7,9 +7,21 @@
 If no key is set in keyring, the user will be manually prompted for their key.
 """
 
+import logging
+
 import keyring
 from keyring.errors import KeyringError
 
+logger = logging.getLogger(__name__)
+
+
+class RetryLimitExceededError(KeyringError):
+    """Exception if the user has exceeded the retry limit for entering an API key."""
+
+
+class InputTerminatedError(KeyringError):
+    """Exception if the user manually terminates retry for entering an API key."""
+
 
 def get_api_key() -> str:
     """Gets the user's PSI API key from their keyring store.
@@ -21,29 +33,38 @@ def get_api_key() -> str:
     Returns:
         A str representing the PSI API key.
     Raises:
-        SystemExit: The user terminated the reprompt.
+        InputTerminatedError: The user terminated the reprompt.
+        RetryLimitExceededError: The reprompt limit was exceeded.
     """
+    logger.info("Attempting to retrieve API key from keystore.")
     try:
         # get_password() returns None for an empty key
         psi_api_key = keyring.get_password("system", "psikey")
-    except KeyringError:
-        print("There was an error retrieving your API key from the keystore.")
+    except KeyringError as err:
+        logger.error(
+            f"There was an error retrieving your API key from the keystore: {err}",
+            exc_info=True,
+        )
         psi_api_key = None
 
     if psi_api_key is None:
-        psi_api_key = input("No API key found. Enter it manually:\n")
+        logger.warning("No API key found. Defaulting to manual input.")
+        psi_api_key = input("No API key found. Enter your key manually:\n")
 
     retry_limit = 5
     while not psi_api_key:
+        logger.warning("Empty API key supplied. Reprompting user for key.")
         reprompt = input(
             "Empty API key supplied. Please re-enter your key or Q to quit:\n"
         )
+        # Below errors logged as CRITICAL in main() before exit
         if reprompt in ("Q", "q"):
-            raise SystemExit
+            raise InputTerminatedError("API key input cancelled.")
         elif retry_limit < 1:
-            raise SystemExit("Retry limit exceeded.")
+            raise RetryLimitExceededError("Retry limit for entering API key exceeded.")
         else:
             psi_api_key = reprompt
         retry_limit -= 1
 
+    logger.info("API key retrieval successful.")
     return psi_api_key
diff --git a/src/pyspeedinsights/api/request.py b/src/pyspeedinsights/api/request.py
@@ -1,19 +1,23 @@
 """Async request preparation and processing for PSI API calls."""
 
 import asyncio
+import logging
+import ssl
 from collections import Counter
 from typing import Any, Coroutine, Optional, Union
 
 import aiohttp
 
 from ..utils.generic import remove_nonetype_dict_items
-from ..utils.urls import validate_url
-from .keys import get_api_key
+from ..utils.urls import InvalidURLError, validate_url
+from .keys import KeyringError, get_api_key
 
+logger = logging.getLogger(__name__)
 next_delay = 1  # Global for applying a 1s delay between requests
 
 
 async def get_response(
+    key: str,
     url: str,
     category: Optional[str] = None,
     locale: Optional[str] = None,
@@ -34,6 +38,7 @@ async def get_response(
     base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
     url = validate_url(url)
     params = {
+        "key": key,
         "url": url,
         "category": category,
         "locale": locale,
@@ -44,15 +49,15 @@ async def get_response(
     }
     # Use API defaults instead of passing None values as query params.
     params = remove_nonetype_dict_items(params)
-    params["key"] = get_api_key()
     req_url = params["url"]
 
     # Add a 1s delay between calls to avoid 500 errors from server.
     global next_delay
     next_delay += 1
+    logger.debug("Sleeping request to prevent server errors.")
     await asyncio.sleep(next_delay)
 
-    print(f"Sending request... ({req_url})")
+    logger.info(f"Sending request... ({req_url})")
     # Make async call with query params to PSI API and await response.
     async with aiohttp.ClientSession() as session:
         async with session.get(url=base_url, params=params) as resp:
@@ -62,18 +67,18 @@ async def get_response(
                 try:
                     resp.raise_for_status()
                     json_resp = await resp.json()
-                    print(f"Request successful! ({req_url})")
+                    logger.info(f"Request successful! ({req_url})")
                 except aiohttp.ClientError as err_c:
                     if retry_attempts < 1:
-                        print(err_c)
-                        print(f"Retry limit reached. Skipping ({req_url})")
+                        logger.error(err_c, exc_info=True)
+                        logger.warning(
+                            f"Retry limit for URL reached. Skipping ({req_url})"
+                        )
                         raise aiohttp.ClientError(err_c)
                     else:
                         retry_attempts -= 1
-                        print(
-                            "Request failed. Retrying... ",
-                            f"{retry_attempts} retries left ({req_url})",
-                        )
+                        logger.warning("Request failed. Retrying.")
+                        logger.info(f"{retry_attempts} retries left ({req_url})")
                         await asyncio.sleep(1)
     return json_resp
 
@@ -91,15 +96,39 @@ def run_requests(
 
 async def gather_responses(tasks: list[Coroutine]) -> list[dict]:
     """Gathers tasks and awaits the return of the responses for processing."""
-    print(f"Preparing {len(tasks)} URL(s)...")
+    logger.info(f"Gathering {len(tasks)} URL(s) and scheduling tasks.")
     responses = await asyncio.gather(*tasks, return_exceptions=True)
 
+    # Generator expression for bubbling up critical exceptions (handled in main())
+    # Purposefully explicit here to avoid raising exceptions for aiohttp.ClientError,
+    # as we don't want a single client failure to invalidate the entire run.
+    # OSError and ssl errors are all subclassed by aiohttp exceptions.
+    critical_exception = next(
+        (
+            r
+            for r in responses
+            if isinstance(
+                r,
+                (
+                    KeyringError,
+                    InvalidURLError,
+                    OSError,
+                    ssl.SSLError,
+                    ssl.CertificateError,
+                ),
+            )
+        ),
+        None,
+    )
+
+    if critical_exception is not None:
+        raise critical_exception
+
     type_counts = Counter(type(r) for r in responses)
     c_success, c_fail = type_counts[dict], type_counts[aiohttp.ClientError]
-    print(
-        f"{c_success}/{len(tasks)} URL(s) processed successfully. ",
-        f"{c_fail} skipped due to errors.",
-    )
+    logger.info(f"{c_success}/{len(tasks)} URL(s) processed successfully. ")
+    logger.warning(f"{c_fail} skipped due to errors. Removing failed URL(s).")
+
     # Remove failures for response processing
     responses = [r for r in responses if type(r) == dict]
     return responses
@@ -109,8 +138,11 @@ def get_tasks(
     request_urls: list[str], api_args_dict: dict[str, Any]
 ) -> list[Coroutine]:
     """Creates a list of tasks that call get_response() with request params."""
+    logger.info("Creating list of tasks based on parsed URL(s).")
+    key = get_api_key()
     tasks = []
     for url in request_urls:
         api_args_dict["url"] = url
+        api_args_dict["key"] = key
         tasks.append(get_response(**api_args_dict))
     return tasks
diff --git a/src/pyspeedinsights/api/response.py b/src/pyspeedinsights/api/response.py
@@ -2,12 +2,16 @@
 
 import copy
 import json
+import logging
 from datetime import datetime
 from typing import Optional, Union
 
 from ..cli.choices import COMMAND_CHOICES
+from ..utils.exceptions import JSONKeyError
 from ..utils.generic import sort_dict_alpha
 
+logger = logging.getLogger(__name__)
+
 
 def process_json(json_resp: dict, category: str, strategy: str) -> None:
     """Dumps raw json response to a file in the working directory.
@@ -20,7 +24,7 @@ def process_json(json_resp: dict, category: str, strategy: str) -> None:
 
     with open(filename, "w", encoding="utf-8") as f:
         json.dump(json_resp, f, ensure_ascii=False, indent=4)
-        print("JSON processed. Check your current directory.")
+        logger.info("JSON processed. Check your current working directory.")
 
 
 def process_excel(
@@ -31,24 +35,38 @@ def process_excel(
     Called within main() in pyspeedinsights.app.
     Metrics results are only included if the category is performance.
     """
-    audits_base = _get_audits_base(json_resp)  # Location of audits in json response
-    metadata = _parse_metadata(json_resp, category)
-    audit_results = _parse_audits(audits_base)
+    json_err = "Malformed JSON response. Skipping Excel processing for URL: "
+    try:
+        audits_base = _get_audits_base(json_resp)  # Location of audits in json response
+        metadata = _parse_metadata(json_resp, category)
+        audit_results = _parse_audits(audits_base)
+    except JSONKeyError as err:
+        logger.error(f"{json_err}{err}", exc_info=True)
+        return {}
+
     if metrics is not None and category == "performance":
-        metrics_results = _parse_metrics(audits_base, metrics)
+        try:
+            metrics_results = _parse_metrics(audits_base, metrics)
+        except JSONKeyError as err:
+            logger.error(f"{json_err}{err}", exc_info=True)
+            return {}
     else:
+        logger.warning("Skipping metrics (not chosen or non-performance category)")
         metrics_results = None
 
     results: dict[str, Union[dict, None]] = {
         "metadata": metadata,
         "audit_results": audit_results,
         "metrics_results": metrics_results,
     }
+    logger.info("Response data processed for URL.")
     return results
 
 
 def _parse_metadata(json_resp: dict, category: str) -> dict[str, Union[str, int]]:
     """Parses various metadata from the JSON response to write to Excel."""
+    logger.info("Parsing metadata from JSON response.")
+
     json_base = json_resp["lighthouseResult"]
     strategy = json_base["configSettings"]["formFactor"]
     category_score = json_base["categories"][category]["score"]
@@ -72,6 +90,8 @@ def _parse_audits(audits_base: dict) -> dict[str, tuple[Union[int, float]]]:
         A dict of audit results with audit names as keys and tuples of length 2
         as values containing the audit scores and numeric values, respectively.
     """
+    logger.info("Parsing audit data from JSON response.")
+
     audit_results = {}
     # Create results dict with scores and numerical values for each audit.
     for k in audits_base.keys():
@@ -97,11 +117,13 @@ def _parse_metrics(
         and int or float metrics as values.
     """
     if "all" in metrics:
+        logger.info("Parsing all metrics.")
         metrics_to_use = copy.copy(COMMAND_CHOICES["metrics"])
         # Remove 'all' to avoid key errors, as it doesn't exist in JSON resp.
         if type(metrics_to_use) == list:
             metrics_to_use.remove("all")
     else:
+        logger.info("Parsing chosen metrics.")
         metrics_to_use = metrics
 
     # Create new dict of metrics based on user's chosen metrics.
@@ -125,10 +147,19 @@ def _get_timestamp(json_resp: dict) -> str:
     Returns:
         A str in format year-month-day_hour.minute.second.
     """
-    timestamp = json_resp["analysisUTCTimestamp"]
+    logger.info("Parsing timestamp from JSON response.")
+    time_format = "%Y-%m-%d_%H.%M.%S"
+
+    try:
+        timestamp = json_resp["analysisUTCTimestamp"]
+    except JSONKeyError:
+        logger.warning("Unable to parse timestamp. Falling back to local time.")
+        date = datetime.now().strftime(time_format)
+        return date
+
     ts_no_fractions = timestamp.split(".")[0]  # Remove fraction
     if ts_no_fractions[-1] != "Z":
         ts_no_fractions += "Z"  # Add Z back after fraction removal
     dt_object = datetime.strptime(ts_no_fractions, "%Y-%m-%dT%H:%M:%SZ")
-    date = dt_object.strftime("%Y-%m-%d_%H.%M.%S")
+    date = dt_object.strftime(time_format)
     return date