Skip to content

Commit

Permalink
Cache processed activities
Browse files Browse the repository at this point in the history
  • Loading branch information
hugovk committed Feb 9, 2024
1 parent 3f05e06 commit 2cc2364
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/stravavis/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def main():
if os.path.isdir(args.path):
args.path = os.path.join(args.path, "*")

filenames = glob.glob(args.path)
filenames = sorted(glob.glob(args.path))
if not filenames:
sys.exit(f"No files found matching {args.path}")

Expand Down
37 changes: 34 additions & 3 deletions src/stravavis/process_data.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from __future__ import annotations

import glob
import hashlib
import math
import tempfile
from multiprocessing import Pool
from pathlib import Path

import fit2gpx
import gpxpy
import pandas as pd
from rich.progress import track


def process_file(fpath):
def process_file(fpath: str) -> pd.DataFrame | None:
if fpath.endswith(".gpx"):
return process_gpx(fpath)
elif fpath.endswith(".fit"):
Expand All @@ -18,7 +22,7 @@ def process_file(fpath):

# Function for processing an individual GPX file
# Ref: https://pypi.org/project/gpxpy/
def process_gpx(gpxfile):
def process_gpx(gpxfile: str) -> pd.DataFrame | None:
with open(gpxfile, encoding="utf-8") as f:
try:
activity = gpxpy.parse(f)
Expand Down Expand Up @@ -64,7 +68,7 @@ def process_gpx(gpxfile):

# Function for processing an individual FIT file
# Ref: https://github.com/dodo-saba/fit2gpx
def process_fit(fitfile):
def process_fit(fitfile: str) -> pd.DataFrame:
conv = fit2gpx.Converter()
df_lap, df = conv.fit_to_dataframes(fname=fitfile)

Expand Down Expand Up @@ -101,9 +105,33 @@ def process_fit(fitfile):
return df


def load_cache(filenames: list[str]) -> tuple[Path, pd.DataFrame | None]:
# Create a cache key from the filenames
key = hashlib.md5("".join(filenames).encode("utf-8")).hexdigest()

# Create a cache directory
dir_name = Path(tempfile.gettempdir()) / "stravavis"
dir_name.mkdir(parents=True, exist_ok=True)
cache_filename = dir_name / f"cached_activities_{key}.pkl"
print(f"Cache filename: {cache_filename}")

# Load cache if it exists
try:
df = pd.read_pickle(cache_filename)
print("Loaded cached activities")
return cache_filename, df
except FileNotFoundError:
print("Cache not found")
return cache_filename, None


# Function for processing (unzipped) GPX and FIT files in a directory (path)
def process_data(filenames: list[str]) -> pd.DataFrame:
# Process all files (GPX or FIT)
cache_filename, df = load_cache(filenames)
if df is not None:
return df

with Pool() as pool:
try:
it = pool.imap_unordered(process_file, filenames)
Expand All @@ -117,4 +145,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame:

df["time"] = pd.to_datetime(df["time"], utc=True)

# Save cache
df.to_pickle(cache_filename)

return df

0 comments on commit 2cc2364

Please sign in to comment.