Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix breaking changes of pocket api #13

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions src/pockexport/dal.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,43 @@
#!/usr/bin/env python3
from __future__ import annotations

import json
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterator, NamedTuple, Sequence
from typing import Iterator, NamedTuple, Sequence

from .exporthelpers import dal_helper
from .exporthelpers.dal_helper import Json, PathIsh, pathify
from .exporthelpers.dal_helper import (
Json,
PathIsh,
datetime_aware,
fromisoformat,
pathify,
)

# TODO FIXME are times in utc? not mentioned anywhere...

class Highlight(NamedTuple):
json: Any
json: Json

@property
def text(self) -> str:
return self.json['quote']

@property
def created(self) -> datetime:
return datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S')
def created(self) -> datetime_aware:
created_at_s = self.json['created_at']
if created_at_s.endswith('Z'):
# FIXME not convinced timestamp is correct here?
# tested with item highlighted at 2024-09-30 at 00:53 UTC and it appeared as 2024-09-29T19:53:35.000Z in export??
return fromisoformat(created_at_s)
else:
# older format (pre September 2024)
dt = datetime.strptime(self.json['created_at'], '%Y-%m-%d %H:%M:%S')
return dt.replace(tzinfo=timezone.utc)


class Article(NamedTuple):
json: Any
json: Json

@property
def url(self) -> str:
Expand All @@ -41,8 +56,8 @@ def pocket_link(self) -> str:
return 'https://app.getpocket.com/read/' + self.json['item_id']

@property
def added(self) -> datetime:
return datetime.fromtimestamp(int(self.json['time_added']))
def added(self) -> datetime_aware:
return datetime.fromtimestamp(int(self.json['time_added']), tz=timezone.utc)

@property
def highlights(self) -> Sequence[Highlight]:
Expand All @@ -64,7 +79,12 @@ def raw(self) -> Json:
return json.loads(last.read_text())

def articles(self) -> Iterator[Article]:
yield from map(Article, self.raw()['list'].values())
for j in self.raw()['list'].values():
# means "item should be deleted" according to api?? https://getpocket.com/developer/docs/v3/retrieve
# started happening around September 2024... in this case there is no data inside except item id
if j['status'] == '2':
continue
yield Article(j)


def _get_test_sources() -> Sequence[PathIsh]:
Expand All @@ -79,10 +99,10 @@ def test() -> None:
articles = list(dal.articles())
assert len(articles) == 10
for a in articles:
assert a.url is not None
assert a.title is not None
assert a.url is not None
assert a.title is not None
assert a.pocket_link is not None
assert a.added is not None
assert a.added is not None
for h in a.highlights:
h.text # noqa: B018
h.created # noqa: B018
Expand Down
76 changes: 56 additions & 20 deletions src/pockexport/export.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
#!/usr/bin/env python3
from __future__ import annotations

import json

import pocket # type: ignore

from .exporthelpers.export_helper import Json
from .exporthelpers.logging_helper import make_logger

## useful for debugging
# from http.client import HTTPConnection
# HTTPConnection.debuglevel = 1
###

logger = make_logger(__name__, level='debug')


class Exporter:
def __init__(self, *args, **kwargs) -> None:
Expand All @@ -15,25 +27,47 @@ def export_json(self):
def get(self, **kwargs):
pass

# apparently no pagination?
res = get(
self.api,
images=1,
videos=1,
tags=1,
rediscovery=1,
annotations=1,
authors=1,
itemOptics=1,
meta=1,
posts=1,
total=1,
forceaccount=1,
state='all',
sort='newest',
detailType='complete',
)
return res[0]
all_items: dict[str, Json] = {}

first_res: Json | None = None
total: int | None = None

while True:
offset = len(all_items)
logger.debug(f'retrieving from {offset=} (expected {total=})')
res, _headers = get(
self.api,
images=1,
videos=1,
tags=1,
rediscovery=1,
annotations=1,
authors=1,
itemOptics=1,
meta=1,
posts=1,
total=1,
forceaccount=1,
offset=offset,
count=30, # max count per request according to api docs
state='all',
sort='newest',
detailType='complete',
)
if first_res is None:
first_res = res

assert res.get('error') is None, res # just in case
total = int(res['total'])

new_items: dict[str, Json] = res['list']
if len(new_items) == 0:
break

all_items.update(new_items)

first_res['list'] = all_items # eh, hacky, but not sure what's a better way
return first_res


def get_json(**params):
Expand All @@ -54,13 +88,15 @@ def main() -> None:

def make_parser():
from .exporthelpers.export_helper import Parser, setup_parser

parser = Parser('Export your personal Pocket data, *including highlights* as JSON.')
setup_parser(
parser=parser,
params=['consumer_key', 'access_token'],
extra_usage='''
You can also import ~pockexport.export~ as a module and call ~get_json~ function directly to get raw JSON.
''')
''',
)
return parser


Expand Down
2 changes: 1 addition & 1 deletion src/pockexport/exporthelpers
Submodule exporthelpers updated 1 files
+13 −0 dal_helper.py
Loading