-
Notifications
You must be signed in to change notification settings - Fork 433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(python): add capability to read unity catalog (uc://) uris #3113
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -266,14 +266,14 @@ pub struct TableSummary { | |
pub struct Table { | ||
/// Username of table creator. | ||
#[serde(default)] | ||
pub created_by: String, | ||
pub created_by: Option<String>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are instances I encountered where |
||
|
||
/// Name of table, relative to parent schema. | ||
pub name: String, | ||
|
||
/// Username of user who last modified the table. | ||
#[serde(default)] | ||
pub updated_by: String, | ||
pub updated_by: Option<String>, | ||
|
||
/// List of schemes whose objects can be referenced without qualification. | ||
#[serde(default)] | ||
|
@@ -283,6 +283,7 @@ pub struct Table { | |
pub data_source_format: DataSourceFormat, | ||
|
||
/// Full name of table, in form of catalog_name.schema_name.table_name | ||
#[serde(default)] | ||
pub full_name: String, | ||
|
||
/// Name of parent schema relative to its parent catalog. | ||
|
@@ -292,6 +293,7 @@ pub struct Table { | |
pub storage_location: String, | ||
|
||
/// Unique identifier of parent metastore. | ||
#[serde(default)] | ||
pub metastore_id: String, | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,6 +78,7 @@ use crate::merge::PyMergeBuilder; | |
use crate::query::PyQueryBuilder; | ||
use crate::schema::{schema_to_pyobject, Field}; | ||
use crate::utils::rt; | ||
use deltalake_catalog_unity::UnityCatalogBuilder; | ||
|
||
#[derive(FromPyObject)] | ||
enum PartitionFilterValue { | ||
|
@@ -171,12 +172,24 @@ impl RawDeltaTable { | |
log_buffer_size: Option<usize>, | ||
) -> PyResult<Self> { | ||
py.allow_threads(|| { | ||
let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri) | ||
.with_io_runtime(IORuntime::default()); | ||
let options = storage_options.clone().unwrap_or_default(); | ||
if let Some(storage_options) = storage_options { | ||
builder = builder.with_storage_options(storage_options) | ||
let (table_path, uc_token) = if UnityCatalogBuilder::is_unity_catalog_uri(table_uri) { | ||
match rt().block_on(UnityCatalogBuilder::get_uc_location_and_token(table_uri)) { | ||
Ok(tup) => tup, | ||
Err(err) => return Err(PyRuntimeError::new_err(err.to_string())), | ||
} | ||
} else { | ||
(table_uri.to_string(), "".to_string()) | ||
}; | ||
|
||
let mut options = storage_options.clone().unwrap_or_default(); | ||
if !uc_token.is_empty() { | ||
options.insert("UNITY_CATALOG_TEMPORARY_TOKEN".to_string(), uc_token); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have added this key
Comment on lines
+175
to
+186
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would be a better fit for code inside an object store factory There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep will have to wait for #3078 to get merged before this one, the temp credentials part is in there :)
Yes, this code is currently duplicated in |
||
} | ||
|
||
let mut builder = deltalake::DeltaTableBuilder::from_uri(&table_path) | ||
.with_io_runtime(IORuntime::default()); | ||
builder = builder.with_storage_options(options.clone()); | ||
|
||
if let Some(version) = version { | ||
builder = builder.with_version(version) | ||
} | ||
|
@@ -193,7 +206,7 @@ impl RawDeltaTable { | |
Ok(RawDeltaTable { | ||
_table: Arc::new(Mutex::new(table)), | ||
_config: FsConfig { | ||
root_url: table_uri.into(), | ||
root_url: table_path, | ||
options, | ||
}, | ||
}) | ||
|
@@ -206,10 +219,23 @@ impl RawDeltaTable { | |
table_uri: &str, | ||
storage_options: Option<HashMap<String, String>>, | ||
) -> PyResult<bool> { | ||
let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri); | ||
if let Some(storage_options) = storage_options { | ||
builder = builder.with_storage_options(storage_options) | ||
let (table_path, uc_token) = if UnityCatalogBuilder::is_unity_catalog_uri(table_uri) { | ||
match rt().block_on(UnityCatalogBuilder::get_uc_location_and_token(table_uri)) { | ||
Ok(tup) => tup, | ||
Err(err) => return Err(PyRuntimeError::new_err(err.to_string())), | ||
} | ||
} else { | ||
(table_uri.to_string(), "".to_string()) | ||
}; | ||
|
||
let mut options = storage_options.clone().unwrap_or_default(); | ||
if !uc_token.is_empty() { | ||
options.insert("UNITY_CATALOG_TEMPORARY_TOKEN".to_string(), uc_token); | ||
} | ||
|
||
let mut builder = deltalake::DeltaTableBuilder::from_uri(&table_path) | ||
.with_io_runtime(IORuntime::default()); | ||
builder = builder.with_storage_options(options.clone()); | ||
Ok(rt() | ||
.block_on(async { | ||
match builder.build() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This allows users to work with a local (non-https) Unity Catalog REST API with
delta-rs
.