diff --git a/src/pockexport/dal.py b/src/pockexport/dal.py index 38f68cb..83efd4b 100755 --- a/src/pockexport/dal.py +++ b/src/pockexport/dal.py @@ -1,28 +1,43 @@ #!/usr/bin/env python3 +from __future__ import annotations + import json -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path -from typing import Any, Iterator, NamedTuple, Sequence +from typing import Iterator, NamedTuple, Sequence from .exporthelpers import dal_helper -from .exporthelpers.dal_helper import Json, PathIsh, pathify +from .exporthelpers.dal_helper import ( + Json, + PathIsh, + datetime_aware, + fromisoformat, + pathify, +) -# TODO FIXME are times in utc? not mentioned anywhere... class Highlight(NamedTuple): - json: Any + json: Json @property def text(self) -> str: return self.json['quote'] @property - def created(self) -> datetime: - return datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S') + def created(self) -> datetime_aware: + created_at_s = self.json['created_at'] + if created_at_s.endswith('Z'): + # FIXME not convinced timestamp is correct here? + # tested with item highlighted at 2024-09-30 at 00:53 UTC and it appeared as 2024-09-29T19:53:35.000Z in export?? + return fromisoformat(created_at_s) + else: + # older format (pre September 2024) + dt = datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S') + return dt.replace(tzinfo=timezone.utc) class Article(NamedTuple): - json: Any + json: Json @property def url(self) -> str: @@ -41,8 +56,8 @@ def pocket_link(self) -> str: return 'https://app.getpocket.com/read/' + self.json['item_id'] @property - def added(self) -> datetime: - return datetime.fromtimestamp(int(self.json['time_added'])) + def added(self) -> datetime_aware: + return datetime.fromtimestamp(int(self.json['time_added']), tz=timezone.utc) @property def highlights(self) -> Sequence[Highlight]: @@ -64,7 +79,12 @@ def raw(self) -> Json: return json.loads(last.read_text()) def articles(self) -> Iterator[Article]: - yield from map(Article, self.raw()['list'].values()) + for j in self.raw()['list'].values(): + # means "item should be deleted" according to api?? https://getpocket.com/developer/docs/v3/retrieve + # started happening around September 2024... in this case there is no data inside except item id + if j['status'] == '2': + continue + yield Article(j) def _get_test_sources() -> Sequence[PathIsh]: @@ -79,10 +99,10 @@ def test() -> None: articles = list(dal.articles()) assert len(articles) == 10 for a in articles: - assert a.url is not None - assert a.title is not None + assert a.url is not None + assert a.title is not None assert a.pocket_link is not None - assert a.added is not None + assert a.added is not None for h in a.highlights: h.text # noqa: B018 h.created # noqa: B018 diff --git a/src/pockexport/export.py b/src/pockexport/export.py index 4fdc645..d90db6d 100755 --- a/src/pockexport/export.py +++ b/src/pockexport/export.py @@ -1,8 +1,20 @@ #!/usr/bin/env python3 +from __future__ import annotations + import json import pocket # type: ignore +from .exporthelpers.export_helper import Json +from .exporthelpers.logging_helper import make_logger + +## useful for debugging +# from http.client import HTTPConnection +# HTTPConnection.debuglevel = 1 +### + +logger = make_logger(__name__, level='debug') + class Exporter: def __init__(self, *args, **kwargs) -> None: @@ -15,25 +27,47 @@ def export_json(self): def get(self, **kwargs): pass - # apparently no pagination? - res = get( - self.api, - images=1, - videos=1, - tags=1, - rediscovery=1, - annotations=1, - authors=1, - itemOptics=1, - meta=1, - posts=1, - total=1, - forceaccount=1, - state='all', - sort='newest', - detailType='complete', - ) - return res[0] + all_items: dict[str, Json] = {} + + first_res: Json | None = None + total: int | None = None + + while True: + offset = len(all_items) + logger.debug(f'retrieving from {offset=} (expected {total=})') + res, _headers = get( + self.api, + images=1, + videos=1, + tags=1, + rediscovery=1, + annotations=1, + authors=1, + itemOptics=1, + meta=1, + posts=1, + total=1, + forceaccount=1, + offset=offset, + count=30, # max count per request according to api docs + state='all', + sort='newest', + detailType='complete', + ) + if first_res is None: + first_res = res + + assert res.get('error') is None, res # just in case + total = int(res['total']) + + new_items: dict[str, Json] = res['list'] + if len(new_items) == 0: + break + + all_items.update(new_items) + + first_res['list'] = all_items # eh, hacky, but not sure what's a better way + return first_res def get_json(**params): @@ -54,13 +88,15 @@ def main() -> None: def make_parser(): from .exporthelpers.export_helper import Parser, setup_parser + parser = Parser('Export your personal Pocket data, *including highlights* as JSON.') setup_parser( parser=parser, params=['consumer_key', 'access_token'], extra_usage=''' You can also import ~pockexport.export~ as a module and call ~get_json~ function directly to get raw JSON. -''') +''', + ) return parser diff --git a/src/pockexport/exporthelpers b/src/pockexport/exporthelpers index e907539..96b6cdf 160000 --- a/src/pockexport/exporthelpers +++ b/src/pockexport/exporthelpers @@ -1 +1 @@ -Subproject commit e907539f4512a297a6a777794ec9988bc361da2d +Subproject commit 96b6cdf231ed11a1b12983037137f93aa20562df