Skip to content
This repository has been archived by the owner on Aug 11, 2023. It is now read-only.

Improve exception handling, add logging and bug fixes #16

Merged
merged 13 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ build/
# Package specific
psi*.json
psi*.xlsx
psi.log
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ Example:

Full list of available metrics:

* `observedTotalCumulativeLayoutShift`
* `observedCumulativeLayoutShift`
* `observedLargestContentfulPaintAllFrames`
* `maxPotentialFID`
Expand Down
33 changes: 27 additions & 6 deletions src/pyspeedinsights/api/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,21 @@
If no key is set in keyring, the user will be manually prompted for their key.
"""

import logging

import keyring
from keyring.errors import KeyringError

logger = logging.getLogger(__name__)


class RetryLimitExceededError(KeyringError):
"""Exception if the user has exceeded the retry limit for entering an API key."""


class InputTerminatedError(KeyringError):
"""Exception if the user manually terminates retry for entering an API key."""


def get_api_key() -> str:
"""Gets the user's PSI API key from their keyring store.
Expand All @@ -21,29 +33,38 @@ def get_api_key() -> str:
Returns:
A str representing the PSI API key.
Raises:
SystemExit: The user terminated the reprompt.
InputTerminatedError: The user terminated the reprompt.
RetryLimitExceededError: The reprompt limit was exceeded.
"""
logger.info("Attempting to retrieve API key from keystore.")
try:
# get_password() returns None for an empty key
psi_api_key = keyring.get_password("system", "psikey")
except KeyringError:
print("There was an error retrieving your API key from the keystore.")
except KeyringError as err:
logger.error(
f"There was an error retrieving your API key from the keystore: {err}",
exc_info=True,
)
psi_api_key = None

if psi_api_key is None:
psi_api_key = input("No API key found. Enter it manually:\n")
logger.warning("No API key found. Defaulting to manual input.")
psi_api_key = input("No API key found. Enter your key manually:\n")

retry_limit = 5
while not psi_api_key:
logger.warning("Empty API key supplied. Reprompting user for key.")
reprompt = input(
"Empty API key supplied. Please re-enter your key or Q to quit:\n"
)
# Below errors logged as CRITICAL in main() before exit
if reprompt in ("Q", "q"):
raise SystemExit
raise InputTerminatedError("API key input cancelled.")
elif retry_limit < 1:
raise SystemExit("Retry limit exceeded.")
raise RetryLimitExceededError("Retry limit for entering API key exceeded.")
else:
psi_api_key = reprompt
retry_limit -= 1

logger.info("API key retrieval successful.")
return psi_api_key
64 changes: 48 additions & 16 deletions src/pyspeedinsights/api/request.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
"""Async request preparation and processing for PSI API calls."""

import asyncio
import logging
import ssl
from collections import Counter
from typing import Any, Coroutine, Optional, Union

import aiohttp

from ..utils.generic import remove_nonetype_dict_items
from ..utils.urls import validate_url
from .keys import get_api_key
from ..utils.urls import InvalidURLError, validate_url
from .keys import KeyringError, get_api_key

logger = logging.getLogger(__name__)
next_delay = 1 # Global for applying a 1s delay between requests


async def get_response(
key: str,
url: str,
category: Optional[str] = None,
locale: Optional[str] = None,
Expand All @@ -34,6 +38,7 @@ async def get_response(
base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
url = validate_url(url)
params = {
"key": key,
"url": url,
"category": category,
"locale": locale,
Expand All @@ -44,15 +49,15 @@ async def get_response(
}
# Use API defaults instead of passing None values as query params.
params = remove_nonetype_dict_items(params)
params["key"] = get_api_key()
req_url = params["url"]

# Add a 1s delay between calls to avoid 500 errors from server.
global next_delay
next_delay += 1
logger.debug("Sleeping request to prevent server errors.")
await asyncio.sleep(next_delay)

print(f"Sending request... ({req_url})")
logger.info(f"Sending request... ({req_url})")
# Make async call with query params to PSI API and await response.
async with aiohttp.ClientSession() as session:
async with session.get(url=base_url, params=params) as resp:
Expand All @@ -62,18 +67,18 @@ async def get_response(
try:
resp.raise_for_status()
json_resp = await resp.json()
print(f"Request successful! ({req_url})")
logger.info(f"Request successful! ({req_url})")
except aiohttp.ClientError as err_c:
if retry_attempts < 1:
print(err_c)
print(f"Retry limit reached. Skipping ({req_url})")
logger.error(err_c, exc_info=True)
logger.warning(
f"Retry limit for URL reached. Skipping ({req_url})"
)
raise aiohttp.ClientError(err_c)
else:
retry_attempts -= 1
print(
"Request failed. Retrying... ",
f"{retry_attempts} retries left ({req_url})",
)
logger.warning("Request failed. Retrying.")
logger.info(f"{retry_attempts} retries left ({req_url})")
await asyncio.sleep(1)
return json_resp

Expand All @@ -91,15 +96,39 @@ def run_requests(

async def gather_responses(tasks: list[Coroutine]) -> list[dict]:
"""Gathers tasks and awaits the return of the responses for processing."""
print(f"Preparing {len(tasks)} URL(s)...")
logger.info(f"Gathering {len(tasks)} URL(s) and scheduling tasks.")
responses = await asyncio.gather(*tasks, return_exceptions=True)

# Generator expression for bubbling up critical exceptions (handled in main())
# Purposefully explicit here to avoid raising exceptions for aiohttp.ClientError,
# as we don't want a single client failure to invalidate the entire run.
# OSError and ssl errors are all subclassed by aiohttp exceptions.
critical_exception = next(
(
r
for r in responses
if isinstance(
r,
(
KeyringError,
InvalidURLError,
OSError,
ssl.SSLError,
ssl.CertificateError,
),
)
),
None,
)

if critical_exception is not None:
raise critical_exception

type_counts = Counter(type(r) for r in responses)
c_success, c_fail = type_counts[dict], type_counts[aiohttp.ClientError]
print(
f"{c_success}/{len(tasks)} URL(s) processed successfully. ",
f"{c_fail} skipped due to errors.",
)
logger.info(f"{c_success}/{len(tasks)} URL(s) processed successfully. ")
logger.warning(f"{c_fail} skipped due to errors. Removing failed URL(s).")

# Remove failures for response processing
responses = [r for r in responses if type(r) == dict]
return responses
Expand All @@ -109,8 +138,11 @@ def get_tasks(
request_urls: list[str], api_args_dict: dict[str, Any]
) -> list[Coroutine]:
"""Creates a list of tasks that call get_response() with request params."""
logger.info("Creating list of tasks based on parsed URL(s).")
key = get_api_key()
tasks = []
for url in request_urls:
api_args_dict["url"] = url
api_args_dict["key"] = key
tasks.append(get_response(**api_args_dict))
return tasks
45 changes: 38 additions & 7 deletions src/pyspeedinsights/api/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

import copy
import json
import logging
from datetime import datetime
from typing import Optional, Union

from ..cli.choices import COMMAND_CHOICES
from ..utils.exceptions import JSONKeyError
from ..utils.generic import sort_dict_alpha

logger = logging.getLogger(__name__)


def process_json(json_resp: dict, category: str, strategy: str) -> None:
"""Dumps raw json response to a file in the working directory.
Expand All @@ -20,7 +24,7 @@ def process_json(json_resp: dict, category: str, strategy: str) -> None:

with open(filename, "w", encoding="utf-8") as f:
json.dump(json_resp, f, ensure_ascii=False, indent=4)
print("JSON processed. Check your current directory.")
logger.info("JSON processed. Check your current working directory.")


def process_excel(
Expand All @@ -31,24 +35,38 @@ def process_excel(
Called within main() in pyspeedinsights.app.
Metrics results are only included if the category is performance.
"""
audits_base = _get_audits_base(json_resp) # Location of audits in json response
metadata = _parse_metadata(json_resp, category)
audit_results = _parse_audits(audits_base)
json_err = "Malformed JSON response. Skipping Excel processing for URL: "
try:
audits_base = _get_audits_base(json_resp) # Location of audits in json response
metadata = _parse_metadata(json_resp, category)
audit_results = _parse_audits(audits_base)
except JSONKeyError as err:
logger.error(f"{json_err}{err}", exc_info=True)
return {}

if metrics is not None and category == "performance":
metrics_results = _parse_metrics(audits_base, metrics)
try:
metrics_results = _parse_metrics(audits_base, metrics)
except JSONKeyError as err:
logger.error(f"{json_err}{err}", exc_info=True)
return {}
else:
logger.warning("Skipping metrics (not chosen or non-performance category)")
metrics_results = None

results: dict[str, Union[dict, None]] = {
"metadata": metadata,
"audit_results": audit_results,
"metrics_results": metrics_results,
}
logger.info("Response data processed for URL.")
return results


def _parse_metadata(json_resp: dict, category: str) -> dict[str, Union[str, int]]:
"""Parses various metadata from the JSON response to write to Excel."""
logger.info("Parsing metadata from JSON response.")

json_base = json_resp["lighthouseResult"]
strategy = json_base["configSettings"]["formFactor"]
category_score = json_base["categories"][category]["score"]
Expand All @@ -72,6 +90,8 @@ def _parse_audits(audits_base: dict) -> dict[str, tuple[Union[int, float]]]:
A dict of audit results with audit names as keys and tuples of length 2
as values containing the audit scores and numeric values, respectively.
"""
logger.info("Parsing audit data from JSON response.")

audit_results = {}
# Create results dict with scores and numerical values for each audit.
for k in audits_base.keys():
Expand All @@ -97,11 +117,13 @@ def _parse_metrics(
and int or float metrics as values.
"""
if "all" in metrics:
logger.info("Parsing all metrics.")
metrics_to_use = copy.copy(COMMAND_CHOICES["metrics"])
# Remove 'all' to avoid key errors, as it doesn't exist in JSON resp.
if type(metrics_to_use) == list:
metrics_to_use.remove("all")
else:
logger.info("Parsing chosen metrics.")
metrics_to_use = metrics

# Create new dict of metrics based on user's chosen metrics.
Expand All @@ -125,10 +147,19 @@ def _get_timestamp(json_resp: dict) -> str:
Returns:
A str in format year-month-day_hour.minute.second.
"""
timestamp = json_resp["analysisUTCTimestamp"]
logger.info("Parsing timestamp from JSON response.")
time_format = "%Y-%m-%d_%H.%M.%S"

try:
timestamp = json_resp["analysisUTCTimestamp"]
except JSONKeyError:
logger.warning("Unable to parse timestamp. Falling back to local time.")
date = datetime.now().strftime(time_format)
return date

ts_no_fractions = timestamp.split(".")[0] # Remove fraction
if ts_no_fractions[-1] != "Z":
ts_no_fractions += "Z" # Add Z back after fraction removal
dt_object = datetime.strptime(ts_no_fractions, "%Y-%m-%dT%H:%M:%SZ")
date = dt_object.strftime("%Y-%m-%d_%H.%M.%S")
date = dt_object.strftime(time_format)
return date
Loading