-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: alias read_parquet, read_njson, read_csv to their scan counterp…
…art (#2185) Added three new functions: `read_parquet`, `read_njson` and `read_csv`. They do the same as their scan counterparts. Closes: #2169 Co-authored-by: universalmind303 <cory.grinstead@gmail.com>
- Loading branch information
1 parent
035fe55
commit b34edb9
Showing
5 changed files
with
181 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Tests `read_csv` | ||
|
||
# Absolute path | ||
query I | ||
select count(*) from read_csv('file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv') | ||
---- | ||
102 | ||
|
||
# Absolute path (compressed) | ||
|
||
query I | ||
select count(*) from read_csv('file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv.gz') | ||
---- | ||
102 | ||
|
||
# Compressed (with function argument) | ||
|
||
query I | ||
select count(*) from read_csv( | ||
'file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv.gz', | ||
compression => 'gzip' | ||
); | ||
---- | ||
102 | ||
|
||
# To prove this actually picks up the compression from the argument, giving a | ||
# wrong compression type should fail. | ||
statement error stream/file format not recognized | ||
select count(*) from read_csv( | ||
'file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv.gz', | ||
compression => 'xz' | ||
); | ||
|
||
# Relative path | ||
query I | ||
select count(*) from read_csv('../../testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv') | ||
---- | ||
102 | ||
|
||
# Remote path | ||
query I | ||
select count(*) from read_csv('https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv'); | ||
---- | ||
102 | ||
|
||
# Multiple URLs | ||
|
||
query I | ||
select count(*) from read_csv([ | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv', | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.csv' | ||
]); | ||
---- | ||
204 | ||
|
||
statement error at least one url expected | ||
select * from read_csv([]); | ||
|
||
# Glob patterns not supported on HTTP | ||
|
||
statement error Note that globbing is not supported for HTTP. | ||
select * from read_csv( | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/*.csv' | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Tests `read_ndjson` | ||
|
||
# Absolute path | ||
query I | ||
select count(*) from read_ndjson('file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson') | ||
---- | ||
102 | ||
|
||
# Absolute path (compressed) | ||
query I | ||
select count(*) from read_ndjson('file://${PWD}/testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson.gz') | ||
---- | ||
102 | ||
|
||
# # Relative path | ||
query I | ||
select count(*) from read_ndjson('../../testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson') | ||
---- | ||
102 | ||
|
||
# Remote path | ||
query I | ||
select count(*) from read_ndjson('https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson'); | ||
---- | ||
102 | ||
|
||
|
||
# Multiple URLs | ||
|
||
query I | ||
select count(*) from read_ndjson([ | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson', | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/bikeshare_stations.ndjson' | ||
]); | ||
---- | ||
204 | ||
|
||
statement error at least one url expected | ||
select * from read_ndjson([]); | ||
|
||
# Glob patterns not supported on HTTP | ||
|
||
statement error Unexpected status code '404 Not Found' | ||
select * from read_ndjson( | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/*.ndjson' | ||
); | ||
|
||
statement error Note that globbing is not supported for HTTP. | ||
select * from read_ndjson( | ||
'https://raw.githubusercontent.com/GlareDB/glaredb/main/testdata/sqllogictests_datasources_common/data/*.ndjson' | ||
); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Tests `read_parquet` | ||
|
||
# Absolute path | ||
query I | ||
select count(*) from read_parquet('file://${PWD}/testdata/parquet/userdata1.parquet') | ||
---- | ||
1000 | ||
|
||
# Relative path | ||
query I | ||
select count(*) from read_parquet('../../testdata/parquet/userdata1.parquet') | ||
---- | ||
1000 | ||
|
||
# Remote path | ||
query I | ||
select count(*) from read_parquet('https://github.com/GlareDB/glaredb/raw/main/testdata/parquet/userdata1.parquet'); | ||
---- | ||
1000 | ||
|
||
# Huggingface (percent encoded paths) | ||
# | ||
# Note that this is a pretty big file, but the limit will be pushed down to the | ||
# exec, ensuring we don't need to load the whole thing. | ||
query T | ||
select length(head) > 1 from read_parquet( | ||
'https://huggingface.co/datasets/allenai/soda/resolve/refs%2Fconvert%2Fparquet/default/test/0000.parquet' | ||
) limit 1; | ||
---- | ||
t | ||
|
||
# Multiple URLs | ||
|
||
query I | ||
select count(*) from read_parquet([ | ||
'https://github.com/GlareDB/glaredb/raw/main/testdata/parquet/userdata1.parquet', | ||
'https://github.com/GlareDB/glaredb/raw/main/testdata/parquet/userdata1.parquet' | ||
]); | ||
---- | ||
2000 | ||
|
||
statement error No such file or directory | ||
select * from read_parquet('./testdata/parquet/userdata1.paruqet'); | ||
|
||
# Ambiguous name. | ||
# query I | ||
# select count(*) | ||
# from read_parquet('../../testdata/parquet/userdata1.parquet') p | ||
# inner join (values ('Sweden')) as c(country) on p.country = c.country | ||
# ---- | ||
# 1000 | ||
|
||
# query I | ||
# select count(*) | ||
# from read_parquet('../../testdata/parquet/userdata1.parquet') p | ||
# inner join (select 'Sweden') as c(country) on p.country = c.country | ||
# ---- | ||
# 1000 |