Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

whatsapp.android: expose all entities extracted from the db #363

Merged
merged 1 commit into from
Dec 29, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions my/whatsapp/android.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datetime import datetime, timezone
from pathlib import Path
import sqlite3
from typing import Sequence, Iterator, Optional
from typing import Union, Sequence, Iterator, Optional

from my.core import get_files, Paths, datetime_aware, Res, make_logger, make_config
from my.core.common import unique_everseen
Expand Down Expand Up @@ -56,7 +56,10 @@ class Message:
text: Optional[str]


def _process_db(db: sqlite3.Connection):
Entity = Union[Chat, Sender, Message]


def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
# TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources

chats = {}
Expand All @@ -73,6 +76,7 @@ def _process_db(db: sqlite3.Connection):
id=chat_id,
name=subject,
)
yield chat
chats[chat.id] = chat

senders = {}
Expand All @@ -88,6 +92,7 @@ def _process_db(db: sqlite3.Connection):
id=r['raw_string'],
name=None,
)
yield s
senders[r['_id']] = s

# NOTE: hmm, seems that message_view or available_message_view use lots of NULL as ...
Expand Down Expand Up @@ -187,7 +192,7 @@ def _process_db(db: sqlite3.Connection):
yield m


def _messages() -> Iterator[Res[Message]]:
def _entities() -> Iterator[Res[Entity]]:
paths = inputs()
total = len(paths)
width = len(str(total))
Expand All @@ -200,5 +205,14 @@ def _messages() -> Iterator[Res[Message]]:
yield echain(RuntimeError(f'While processing {path}'), cause=e)


def entities() -> Iterator[Res[Entity]]:
return unique_everseen(_entities)


def messages() -> Iterator[Res[Message]]:
yield from unique_everseen(_messages)
# TODO hmm, specify key=lambda m: m.id?
# not sure since might be useful to keep track of sender changes etc
# probably best not to, or maybe query messages/senders separately and merge later?
for e in entities():
if isinstance(e, (Exception, Message)):
yield e