Skip to content

Commit

Permalink
Dedupe entries when generating schema tables (#5)
Browse files Browse the repository at this point in the history
Something like this should only generate one table entry for `EmailAddress`:
```
               'emailDomains', (select array_agg(split_part(value, '@', 2))
                    from "EmailAddress" EA where "personId"="Person".id),
               'emailAddresses', (select array_agg(value) from "EmailAddress" EA where "personId"="Person".id),
```
  • Loading branch information
loren authored Dec 11, 2023
1 parent 752171f commit 53ba0d0
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sinker"
version = "0.1.1"
version = "0.1.2"
description = "Synchronize Postgres to Elasticsearch"
authors = ["Loren Siebert <loren@paradigm.xyz>"]
license = "MIT/Apache-2.0"
Expand Down
8 changes: 6 additions & 2 deletions src/sinker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@

def generate_schema_tables(view_select_query: str) -> Iterable[str]:
"""
Given a view select query, return a list of tables that are referenced in the query.
Given a view select query, return a list of unique tables that are referenced in the query
in the order they were encountered.
Skip anything that looks like a function call.
:param view_select_query: The select query from the view
"""
seen: set = set()
for table_candidate in TABLE_RE.findall(view_select_query):
if "(" not in table_candidate:
yield table_candidate
if table_candidate not in seen:
seen.add(table_candidate)
yield table_candidate
7 changes: 5 additions & 2 deletions tests/test_generate_schema_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ def test_generate_schema_tables():
view_select_query = """select id,
json_build_object(
'name', "name",
'emailDomains',(select array_agg(split_part(email, '@', 2)) FROM unnest(emails) as email),
'otherEmailDomains',(select array_agg(split_part(email, '@', 2)) FROM unnest(emails) as email),
'emailDomains', (select array_agg(split_part(value, '@', 2))
from "EmailAddress" EA where "personId"="Person".id),
'emailAddresses', (select array_agg(value) from "EmailAddress" EA where "personId"="Person".id),
) as "person"
from "person"
"""
assert list(generate_schema_tables(view_select_query)) == ["person"]
assert list(generate_schema_tables(view_select_query)) == ["EmailAddress", "person"]

0 comments on commit 53ba0d0

Please sign in to comment.