Skip to content

Commit

Permalink
add partitioning test
Browse files Browse the repository at this point in the history
  • Loading branch information
ncclementi committed Aug 17, 2021
1 parent 774e79b commit 7bdd66a
Showing 1 changed file with 35 additions and 1 deletion.
36 changes: 35 additions & 1 deletion dask_bigquery/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import random

import pandas as pd
import pytest
from distributed.utils_test import cluster_fixture # noqa: F401
from distributed.utils_test import client, loop # noqa: F401
from google.cloud import bigquery
Expand Down Expand Up @@ -38,6 +39,7 @@ def push_data():
return df


# test simple read
def test_read_gbq(client):
"""Test simple read of data pushed to BigQuery using pandas-gbq"""
try:
Expand All @@ -62,5 +64,37 @@ def test_read_gbq(client):
assert ddf.npartitions == 2

ddf_comp = ddf.set_index("idx").compute()
# breakpoint()
assert all(ddf_comp == df.set_index("idx"))


# test partitioned data: this test requires a copy of the public dataset
# bigquery-public-data.covid19_public_forecasts.county_14d into a the
# project dask-bigquery


@pytest.mark.parametrize(
"fields",
([], ["county_name"], ["county_name", "county_fips_code"]),
ids=["no_fields", "missing_partition_field", "fields"],
)
def test_read_gbq_partitioning(fields, client):
partitions = ["Teton", "Loudoun"]
ddf = read_gbq(
project_id="dask-bigquery",
dataset_id="covid19_public_forecasts",
table_id="county_14d",
partition_field="county_name",
partitions=partitions,
fields=fields,
)

assert len(ddf) # check it's not empty
loaded = set(ddf.columns) | {ddf.index.name}

if fields:
assert loaded == set(fields) | {"county_name"}
else: # all columns loaded
assert loaded >= set(["county_name", "county_fips_code"])

assert ddf.npartitions == len(partitions)
assert list(ddf.divisions) == sorted(ddf.divisions)

0 comments on commit 7bdd66a

Please sign in to comment.