From 2cc236471c828a8b7c4ff11243e7b5c034252a79 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sun, 14 Jan 2024 13:13:01 +0200 Subject: [PATCH] Cache processed activities --- src/stravavis/cli.py | 2 +- src/stravavis/process_data.py | 37 ++++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/stravavis/cli.py b/src/stravavis/cli.py index 3b86615..d0e4a77 100644 --- a/src/stravavis/cli.py +++ b/src/stravavis/cli.py @@ -89,7 +89,7 @@ def main(): if os.path.isdir(args.path): args.path = os.path.join(args.path, "*") - filenames = glob.glob(args.path) + filenames = sorted(glob.glob(args.path)) if not filenames: sys.exit(f"No files found matching {args.path}") diff --git a/src/stravavis/process_data.py b/src/stravavis/process_data.py index 5f99b53..4881ad2 100644 --- a/src/stravavis/process_data.py +++ b/src/stravavis/process_data.py @@ -1,7 +1,11 @@ from __future__ import annotations +import glob +import hashlib import math +import tempfile from multiprocessing import Pool +from pathlib import Path import fit2gpx import gpxpy @@ -9,7 +13,7 @@ from rich.progress import track -def process_file(fpath): +def process_file(fpath: str) -> pd.DataFrame | None: if fpath.endswith(".gpx"): return process_gpx(fpath) elif fpath.endswith(".fit"): @@ -18,7 +22,7 @@ def process_file(fpath): # Function for processing an individual GPX file # Ref: https://pypi.org/project/gpxpy/ -def process_gpx(gpxfile): +def process_gpx(gpxfile: str) -> pd.DataFrame | None: with open(gpxfile, encoding="utf-8") as f: try: activity = gpxpy.parse(f) @@ -64,7 +68,7 @@ def process_gpx(gpxfile): # Function for processing an individual FIT file # Ref: https://github.com/dodo-saba/fit2gpx -def process_fit(fitfile): +def process_fit(fitfile: str) -> pd.DataFrame: conv = fit2gpx.Converter() df_lap, df = conv.fit_to_dataframes(fname=fitfile) @@ -101,9 +105,33 @@ def process_fit(fitfile): return df +def load_cache(filenames: list[str]) -> tuple[Path, pd.DataFrame | None]: + # Create a cache key from the filenames + key = hashlib.md5("".join(filenames).encode("utf-8")).hexdigest() + + # Create a cache directory + dir_name = Path(tempfile.gettempdir()) / "stravavis" + dir_name.mkdir(parents=True, exist_ok=True) + cache_filename = dir_name / f"cached_activities_{key}.pkl" + print(f"Cache filename: {cache_filename}") + + # Load cache if it exists + try: + df = pd.read_pickle(cache_filename) + print("Loaded cached activities") + return cache_filename, df + except FileNotFoundError: + print("Cache not found") + return cache_filename, None + + # Function for processing (unzipped) GPX and FIT files in a directory (path) def process_data(filenames: list[str]) -> pd.DataFrame: # Process all files (GPX or FIT) + cache_filename, df = load_cache(filenames) + if df is not None: + return df + with Pool() as pool: try: it = pool.imap_unordered(process_file, filenames) @@ -117,4 +145,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame: df["time"] = pd.to_datetime(df["time"], utc=True) + # Save cache + df.to_pickle(cache_filename) + return df