Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: switch json support to use new read_ndjson_auto function #14

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions datasette_parquet/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,14 @@
import json
from pathlib import Path

def sniff_json_columns(fname):
with open(fname) as f:
line = f.readline()
obj = json.loads(line)

return ', '.join(["json->>'{}' AS \"{}\"".format(x, x) for x in obj.keys()])

def view_for(view_name, fname, glob):
view_name = view_name.replace('.', '_')
if fname.endswith('.csv') or fname.endswith('.tsv'):
if fname.endswith(('.csv', '.tsv')):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL!

return "CREATE VIEW \"{}\" AS SELECT * FROM read_csv_auto('{}', header=true)".format(view_name, glob)
elif fname.endswith('.parquet'):
return "CREATE VIEW \"{}\" AS SELECT * FROM '{}'".format(view_name, glob)
elif fname.endswith('.ndjson') or fname.endswith('.jsonl'):
# We need to sniff the first row of the file in order to build a good view
columns = sniff_json_columns(fname)
return "CREATE VIEW \"{}\" AS SELECT {} FROM read_json_objects('{}')".format(view_name, columns, glob)
elif fname.endswith(('.ndjson', '.jsonl')):
return "CREATE VIEW \"{}\" AS SELECT * FROM read_ndjson_auto('{}')".format(view_name, glob)

def create_views(dirname):
rv = []
Expand Down