From a2fa29b95746605e1cf4b9d409235fb5cbbafab7 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 13 Sep 2024 01:18:40 +0100 Subject: [PATCH] my.google.takeout.parser: speedup event merging on newer google_takeout_parser versions --- my/google/takeout/parser.py | 19 +++++++++++++++---- my/youtube/takeout.py | 12 ++++++------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py index 258ab96e..170553ae 100644 --- a/my/google/takeout/parser.py +++ b/my/google/takeout/parser.py @@ -31,6 +31,7 @@ import google_takeout_parser from google_takeout_parser.path_dispatch import TakeoutParser from google_takeout_parser.merge import GoogleEventSet, CacheResults +from google_takeout_parser.models import BaseEvent # see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example from my.config import google as user_config @@ -95,6 +96,17 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: error_policy = config.error_policy count = 0 emitted = GoogleEventSet() + + try: + emitted_add = emitted.add_if_not_present + except AttributeError: + # compat for older versions of google_takeout_parser which didn't have this method + def emitted_add(other: BaseEvent) -> bool: + if other in emitted: + return False + emitted.add(other) + return True + # reversed shouldn't really matter? but logic is to use newer # takeouts if they're named according to date, since JSON Activity # is nicer than HTML Activity @@ -123,10 +135,9 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: elif error_policy == 'drop': pass continue - if event in emitted: - continue - emitted.add(event) - yield event # type: ignore[misc] + + if emitted_add(event): + yield event # type: ignore[misc] logger.debug( f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates" ) diff --git a/my/youtube/takeout.py b/my/youtube/takeout.py index 99d65d93..284c082f 100644 --- a/my/youtube/takeout.py +++ b/my/youtube/takeout.py @@ -1,10 +1,10 @@ from typing import NamedTuple, List, Iterable, TYPE_CHECKING -from ..core import datetime_aware, Res, LazyLogger -from ..core.compat import removeprefix +from my.core import datetime_aware, make_logger, stat, Res, Stats +from my.core.compat import deprecated, removeprefix -logger = LazyLogger(__name__) +logger = make_logger(__name__) class Watched(NamedTuple): @@ -93,7 +93,6 @@ def watched() -> Iterable[Res[Watched]]: ) -from ..core import stat, Stats def stats() -> Stats: return stat(watched) @@ -101,8 +100,9 @@ def stats() -> Stats: ### deprecated stuff (keep in my.media.youtube) if not TYPE_CHECKING: - # "deprecate" by hiding from mypy - get_watched = watched + @deprecated("use 'watched' instead") + def get_watched(*args, **kwargs): + return watched(*args, **kwargs) def _watched_legacy() -> Iterable[Watched]: