Skip to content

Commit

Permalink
refactor: general cleanup and bugfixes (#36)
Browse files Browse the repository at this point in the history
- fix an error when empty results returned from a query
- improve performance of .exists() calls
- avoid unnecessary calls to table.get() when loading data
- Clean up printing of objects
  • Loading branch information
imathews authored Jan 11, 2024
1 parent 3add5b7 commit 49dff0b
Show file tree
Hide file tree
Showing 12 changed files with 191 additions and 283 deletions.
2 changes: 1 addition & 1 deletion src/redivis/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.12.6"
__version__ = "0.13.0"
9 changes: 3 additions & 6 deletions src/redivis/classes/Base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@ def __getitem__(self, key):
self.properties[key] if hasattr(self, 'properties') and self.properties is not None and key in self.properties else None
)

def __str__(self):
properties = self.properties if hasattr(self, 'properties') and self.properties is not None else {}
return json.dumps(properties, indent=2)

def __repr__(self) -> str:
field_strings = []
for key, field in vars(self).items():
field_strings.append(f'{key}={field!r}')
if key != 'properties' and key != 'scoped_reference' and field is not None and not isinstance(field, Base):
field_strings.append(f'{key}:{field!r}')

return f"<{self.__class__.__name__}({','.join(field_strings)})>"
return f"<{self.__class__.__name__} {' '.join(field_strings)}>"
54 changes: 27 additions & 27 deletions src/redivis/classes/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,27 @@ def __init__(
version="current", # TODO: should be version_tag, version would reference a version class (?) dataset().version("next").create()? .version("next").release()?
user=None,
organization=None,
properties={},
properties=None,
):
self.name = name
self.version = version
self.user = user
self.organization = organization
self.identifier = (

self.qualified_reference = properties["qualifiedReference"] if "qualifiedReference" in (properties or {}) else (
f"{(self.organization or self.user).name}.{self.name}:{self.version}"
)
self.uri = f"/datasets/{quote_uri(self.identifier, '')}"
self.properties = {
**{
"kind": 'dataset',
"name": name,
"uri": self.uri
},
**properties
}


self.scoped_reference = properties["scopedReference"] if "scopedReference" in (properties or {}) else f"{self.name}:{self.version}"
self.uri = f"/datasets/{quote_uri(self.qualified_reference, '')}"
self.properties = properties

def create(self, *, public_access_level="none", description=None):
if self.organization:
path = f"/organizations/{self.organization.name}/datasets"
else:
path = f"/users/{self.user.name}/datasets"

self.properties = make_request(
properties = make_request(
method="POST",
path=path,
payload={
Expand All @@ -50,10 +43,11 @@ def create(self, *, public_access_level="none", description=None):
"description": description,
},
)
update_properties(self, properties)
return self

def create_next_version(self, *, if_not_exists=False):
if not self.properties or not hasattr(self.properties, "nextVersion"):
if not self.properties or "nextVersion" not in self.properties:
self.get()

if not self.properties["nextVersion"]:
Expand Down Expand Up @@ -81,16 +75,16 @@ def delete(self):

def exists(self):
try:
make_request(method="GET", path=self.uri)
make_request(method="HEAD", path=self.uri)
return True
except Exception as err:
if err.args[0]["status"] != 404:
raise err
return False

def get(self):
self.properties = make_request(method="GET", path=self.uri)
self.uri = self.properties["uri"]
properties = make_request(method="GET", path=self.uri)
update_properties(self, properties)
return self

def list_tables(self, max_results=None):
Expand All @@ -102,19 +96,16 @@ def list_tables(self, max_results=None):
]

def query(self, query):
return Query(query, default_dataset=self.identifier)
return Query(query, default_dataset=self.qualified_reference)

def release(self):
res = make_request(
version_res = make_request(
method="POST",
path=f"{self.uri}/versions/next/release",
)
self.version = f'v{res["tag"]}'
self.identifier = (
f"{(self.organization or self.user).name}.{self.name}:{self.version}"
)
self.uri = f"/datasets/{quote_uri(self.identifier, '')}"
return self.get()
self.uri = version_res["datasetUri"]
self.get()
return self

def table(self, name, *, sample=False):
return Table(name, dataset=self, sample=sample)
Expand All @@ -128,9 +119,18 @@ def update(self, *, name=None, public_access_level=None, description=None):
if description is not None:
payload["description"] = description

self.properties = make_request(
res = make_request(
method="PATCH",
path=self.uri,
payload=payload,
)
update_properties(self, res)
return self

def update_properties(instance, properties):
instance.properties = properties
instance.qualified_reference = properties["qualifiedReference"]
instance.scoped_reference = properties["scopedReference"]
instance.name = properties["name"]
instance.uri = properties["uri"]
instance.version = properties["version"]["tag"]
39 changes: 21 additions & 18 deletions src/redivis/classes/Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,50 @@


class Project(Base):
def __init__(self, name, *, user, properties={}):
def __init__(self, name, *, user, properties=None):
self.user = user
self.name = name
self.identifier = f"{self.user.name}.{self.name}"
self.uri = f"/projects/{quote_uri(self.identifier, '')}"
self.properties = {
**{
"kind": "project",
"name": name,
"uri": self.uri
},
**properties
}

def list_tables(self, *, max_results=None, include_dataset_tables=False):

self.qualified_reference = properties["qualifiedReference"] if "qualifiedReference" in (properties or {}) else (
f"{self.user.name}.{self.name}"
)
self.scoped_reference = properties["scopedReference"] if "scopedReference" in (properties or {}) else f"{self.name}"
self.uri = f"/projects/{quote_uri(self.qualified_reference, '')}"
self.properties = properties

def list_tables(self, *, max_results=None):
tables = make_paginated_request(
path=f"{self.uri}/tables",
page_size=100,
max_results=max_results,
query={"includeDatasetTables": include_dataset_tables},
)
return [
Table(table["name"], project=self, properties=table) for table in tables
]

def exists(self):
try:
make_request(method="GET", path=self.uri)
make_request(method="HEAD", path=self.uri)
return True
except Exception as err:
if err.args[0]["status"] != 404:
raise err
return False

def get(self):
self.properties = make_request(method="GET", path=self.uri)
self.uri = self.properties["uri"]
properties = make_request(method="GET", path=self.uri)
update_properties(self, properties)
return self

def query(self, query):
return Query(query, default_project=self.identifier)
return Query(query, default_project=self.qualified_reference)

def table(self, name):
return Table(name, project=self)

def update_properties(instance, properties):
instance.properties = properties
instance.qualified_reference = properties["qualifiedReference"]
instance.scoped_reference = properties["scopedReference"]
instance.name = properties["name"]
instance.uri = properties["uri"]
30 changes: 12 additions & 18 deletions src/redivis/classes/Query.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
"defaultDataset": default_dataset if default_dataset else None,
},
)
self.uri = f"/queries/{self.properties['id']}"
self.uri = self.properties["uri"]

def get(self):
self.properties = make_request(method="GET", path=self.uri)
Expand All @@ -44,8 +44,7 @@ def to_arrow_dataset(self, max_results=None, *, progress=True, batch_preprocesso

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_dataset",
progress=progress,
Expand All @@ -58,8 +57,7 @@ def to_arrow_table(self, max_results=None, *, progress=True, batch_preprocessor=

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_table",
progress=progress,
Expand All @@ -72,8 +70,7 @@ def to_polars_lazyframe(self, max_results=None, *, progress=True, batch_preproce

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="polars_lazyframe",
progress=progress,
Expand All @@ -86,8 +83,7 @@ def to_dask_dataframe(self, max_results=None, *, progress=True, batch_preprocess

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="dask_dataframe",
progress=progress,
Expand All @@ -101,9 +97,10 @@ def to_pandas_dataframe(self, max_results=None, *, geography_variable="", progre

self._wait_for_finish()

print()
arrow_table = list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_table",
progress=progress,
Expand Down Expand Up @@ -143,7 +140,7 @@ def to_geopandas_dataframe(self, max_results=None, *, geography_variable="", pro

arrow_table = list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_table",
progress=progress,
Expand Down Expand Up @@ -179,8 +176,7 @@ def to_dataframe(self, max_results=None, *, geography_variable="", progress=True

arrow_table = list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_table",
progress=progress,
Expand All @@ -205,13 +201,12 @@ def to_arrow_batch_iterator(self, max_results=None, *, variables=None, progress=

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(self.properties["outputNumRows"])),
max_results=max_results,
selected_variables=variables,
mapped_variables=self.properties["outputSchema"],
output_type="arrow_iterator",
progress=progress,
coerce_schema=hasattr(self.properties, "container") is False or self.properties["container"][
"kind"] == 'dataset'
coerce_schema=False
)

def list_rows(self, max_results=None, *, progress=True):
Expand All @@ -223,8 +218,7 @@ def list_rows(self, max_results=None, *, progress=True):

return list_rows(
uri=self.uri,
max_results=self.properties["outputNumRows"] if max_results is None else min(max_results, int(
self.properties["outputNumRows"])),
max_results=max_results,
mapped_variables=self.properties["outputSchema"],
output_type="tuple",
progress=progress,
Expand Down
Loading

0 comments on commit 49dff0b

Please sign in to comment.