Skip to content

Commit

Permalink
my.google.takeout.parser: speedup event merging on newer google_takeo…
Browse files Browse the repository at this point in the history
…ut_parser versions
  • Loading branch information
karlicoss committed Sep 13, 2024
1 parent 71fdeca commit a2fa29b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
19 changes: 15 additions & 4 deletions my/google/takeout/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import google_takeout_parser
from google_takeout_parser.path_dispatch import TakeoutParser
from google_takeout_parser.merge import GoogleEventSet, CacheResults
from google_takeout_parser.models import BaseEvent

# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
from my.config import google as user_config
Expand Down Expand Up @@ -95,6 +96,17 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
error_policy = config.error_policy
count = 0
emitted = GoogleEventSet()

try:
emitted_add = emitted.add_if_not_present
except AttributeError:
# compat for older versions of google_takeout_parser which didn't have this method
def emitted_add(other: BaseEvent) -> bool:
if other in emitted:
return False
emitted.add(other)
return True

# reversed shouldn't really matter? but logic is to use newer
# takeouts if they're named according to date, since JSON Activity
# is nicer than HTML Activity
Expand Down Expand Up @@ -123,10 +135,9 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
elif error_policy == 'drop':
pass
continue
if event in emitted:
continue
emitted.add(event)
yield event # type: ignore[misc]

if emitted_add(event):
yield event # type: ignore[misc]
logger.debug(
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
)
Expand Down
12 changes: 6 additions & 6 deletions my/youtube/takeout.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import NamedTuple, List, Iterable, TYPE_CHECKING

from ..core import datetime_aware, Res, LazyLogger
from ..core.compat import removeprefix
from my.core import datetime_aware, make_logger, stat, Res, Stats
from my.core.compat import deprecated, removeprefix


logger = LazyLogger(__name__)
logger = make_logger(__name__)


class Watched(NamedTuple):
Expand Down Expand Up @@ -93,16 +93,16 @@ def watched() -> Iterable[Res[Watched]]:
)


from ..core import stat, Stats
def stats() -> Stats:
return stat(watched)


### deprecated stuff (keep in my.media.youtube)

if not TYPE_CHECKING:
# "deprecate" by hiding from mypy
get_watched = watched
@deprecated("use 'watched' instead")
def get_watched(*args, **kwargs):
return watched(*args, **kwargs)


def _watched_legacy() -> Iterable[Watched]:
Expand Down

0 comments on commit a2fa29b

Please sign in to comment.