Skip to content

Commit

Permalink
feat: add lazy loading of tables
Browse files Browse the repository at this point in the history
  • Loading branch information
PengLiVectra committed Nov 16, 2023
1 parent 5c324cc commit b59a982
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
15 changes: 14 additions & 1 deletion python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def __init__(
storage_options: Optional[Dict[str, str]] = None,
without_files: bool = False,
log_buffer_size: Optional[int] = None,
lazy_load: bool = False
):
"""
Create the Delta Table from a path with an optional version.
Expand All @@ -247,9 +248,19 @@ def __init__(
This can decrease latency if there are many files in the log since the last checkpoint,
but will also increase memory usage. Possible rate limits of the storage backend should
also be considered for optimal performance. Defaults to 4 * number of cpus.
lazy_load: when true the table metadata isn't loaded
"""
self._storage_options = storage_options
if lazy_load:
self._table = RawDeltaTable.load_lazy(
str(table_uri),
version=version,
storage_options=storage_options,
without_files=without_files,
log_buffer_size=log_buffer_size,
)
self._metadata = None
return
self._table = RawDeltaTable(
str(table_uri),
version=version,
Expand Down Expand Up @@ -425,6 +436,8 @@ def metadata(self) -> Metadata:
Returns:
the current Metadata registered in the transaction log
"""
if not self._metadata:
self._metadata = Metadata(self._table)
return self._metadata

def protocol(self) -> ProtocolVersions:
Expand Down
31 changes: 31 additions & 0 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,37 @@ impl RawDeltaTable {
})
}

#[classmethod]
#[pyo3(signature = (table_uri, version = None, storage_options = None, without_files = false))]
fn load_lazy(
_cls: &PyType,
table_uri: &str,
version: Option<i64>,
storage_options: Option<HashMap<String, String>>,
without_files: bool,
) -> PyResult<Self> {
let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri);
let options = storage_options.clone().unwrap_or_default();
if let Some(storage_options) = storage_options {
builder = builder.with_storage_options(storage_options)
}
if let Some(version) = version {
builder = builder.with_version(version)
}
if without_files {
builder = builder.without_files()
}
let table = builder.build().map_err(PythonError::from)?;

Ok(RawDeltaTable {
_table: table,
_config: FsConfig {
root_url: table_uri.into(),
options,
},
})
}

#[classmethod]
#[pyo3(signature = (data_catalog, database_name, table_name, data_catalog_id, catalog_options = None))]
fn get_table_uri_from_data_catalog(
Expand Down

0 comments on commit b59a982

Please sign in to comment.