Skip to content

Commit

Permalink
my.twitter.archive: cleanup linting and use proper configuration via …
Browse files Browse the repository at this point in the history
…abstract class
  • Loading branch information
karlicoss committed Sep 22, 2024
1 parent 02f8fef commit 82de58d
Showing 1 changed file with 44 additions and 41 deletions.
85 changes: 44 additions & 41 deletions my/twitter/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,75 @@
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
"""

from __future__ import annotations

# before this config was named 'twitter', doesn't make too much sense for archive
# todo unify with other code like this, e.g. time.tz.via_location
try:
from my.config import twitter_archive as user_config
except ImportError as ie:
if not (ie.name == 'my.config' and 'twitter_archive' in str(ie)):
# must be caused by something else
raise ie
try:
from my.config import twitter as user_config # type: ignore[assignment]
except ImportError:
raise ie # raise the original exception.. must be something else # noqa: B904
else:
from my.core import warnings
warnings.high('my.config.twitter is deprecated! Please rename it to my.config.twitter_archive in your config')
##


import html
import json # hmm interesting enough, orjson didn't give much speedup here?
from abc import abstractmethod
from dataclasses import dataclass
from datetime import datetime
from functools import cached_property
from itertools import chain
import json # hmm interesting enough, orjson didn't give much speedup here?
from pathlib import Path
from functools import cached_property
import html
from typing import (
TYPE_CHECKING,
Iterator,
List,
Optional,
Sequence,
)

from more_itertools import unique_everseen

from my.core import (
datetime_aware,
get_files,
make_logger,
stat,
Json,
Paths,
Res,
Stats,
datetime_aware,
get_files,
make_logger,
stat,
warnings,
)
from my.core import warnings
from my.core.cfg import make_config
from my.core.serialize import dumps as json_dumps

from .common import TweetId, permalink

logger = make_logger(__name__)

@dataclass
class twitter_archive(user_config):
export_path: Paths # path[s]/glob to the twitter archive takeout

class config:
@property
@abstractmethod
def export_path(self) -> Paths:
"""path[s]/glob to the twitter archive takeout"""
raise NotImplementedError

###

config = make_config(twitter_archive)
def make_config() -> config:
# before this config was named 'twitter', doesn't make too much sense for archive
# todo unify with other code like this, e.g. time.tz.via_location
try:
from my.config import twitter_archive as user_config
except ImportError as ie:
if not (ie.name == 'my.config' and 'twitter_archive' in str(ie)):
# must be caused by something else
raise ie
try:
from my.config import twitter as user_config # type: ignore[assignment]
except ImportError:
raise ie # raise the original exception.. must be something else # noqa: B904
else:
warnings.high('my.config.twitter is deprecated! Please rename it to my.config.twitter_archive in your config')
##

class combined_config(user_config, config):
pass

logger = make_logger(__name__)
return combined_config()


def inputs() -> Sequence[Path]:
return get_files(config.export_path)
return get_files(make_config().export_path)


# TODO make sure it's not used anywhere else and simplify interface
Expand Down Expand Up @@ -121,7 +123,7 @@ def text(self) -> str:
return res

@property
def urls(self) -> List[str]:
def urls(self) -> list[str]:
ents = self.entities
us = ents['urls']
return [u['expanded_url'] for u in us]
Expand Down Expand Up @@ -162,10 +164,10 @@ def id_str(self) -> TweetId:
return self.raw['tweetId']

@property
def text(self) -> Optional[str]:
def text(self) -> str | None:
# NOTE: likes basically don't have anything except text and url
# ugh. I think none means that tweet was deleted?
res: Optional[str] = self.raw.get('fullText')
res: str | None = self.raw.get('fullText')
if res is None:
return None
res = html.unescape(res)
Expand All @@ -186,7 +188,7 @@ def __init__(self, archive_path: Path) -> None:
if not (self.zpath / 'Your archive.html').exists():
self.old_format = True

def raw(self, what: str, *, fname: Optional[str] = None) -> Iterator[Json]:
def raw(self, what: str, *, fname: str | None = None) -> Iterator[Json]:
logger.info(f'{self.zpath} : processing {what}')

path = fname or what
Expand Down Expand Up @@ -317,4 +319,5 @@ def stats() -> Stats:


## Deprecated stuff
Tid = TweetId
if not TYPE_CHECKING:
Tid = TweetId

0 comments on commit 82de58d

Please sign in to comment.