From 77cc8f2b99055195d1bc95d86cb39fd76e07665b Mon Sep 17 00:00:00 2001 From: duonganhthu43 Date: Wed, 3 Apr 2024 11:06:54 +0700 Subject: [PATCH] chore: add description in FieldDefinition for LLM semantics --- dozer-ingestion/aerospike/src/connector.rs | 1 + dozer-ingestion/ethereum/src/log/helper.rs | 13 +++++++++++++ dozer-ingestion/ethereum/src/trace/helper.rs | 8 ++++++++ dozer-ingestion/javascript/src/lib.rs | 1 + dozer-ingestion/kafka/src/debezium/mapper.rs | 6 ++++++ dozer-ingestion/kafka/src/debezium/schema.rs | 3 +++ .../kafka/src/debezium/schema_registry.rs | 1 + .../kafka/src/no_schema_registry_basic.rs | 2 ++ dozer-ingestion/kafka/src/schema_registry_basic.rs | 1 + dozer-ingestion/mongodb/src/lib.rs | 2 ++ dozer-ingestion/mysql/src/connector.rs | 6 ++++++ dozer-ingestion/object-store/src/schema_helper.rs | 1 + dozer-ingestion/oracle/src/connector/mapping.rs | 1 + dozer-ingestion/postgres/src/helper.rs | 1 + dozer-ingestion/postgres/src/schema/sorter.rs | 3 +++ dozer-ingestion/snowflake/src/connection/client.rs | 1 + dozer-ingestion/tests/test_suite/data.rs | 2 ++ dozer-sink-aerospike/src/denorm_dag.rs | 1 + dozer-sink-aerospike/src/lib.rs | 2 ++ dozer-sink-clickhouse/src/metadata.rs | 2 ++ dozer-sink-clickhouse/src/tests.rs | 2 ++ dozer-sink-oracle/src/lib.rs | 7 +++++++ dozer-sql/src/product/join/operator/table.rs | 1 + dozer-sql/src/product/join/processor.rs | 2 ++ dozer-tests/src/sql_tests/helper/schema.rs | 1 + dozer-types/src/arrow_types/from_arrow.rs | 1 + dozer-types/src/types/mod.rs | 2 ++ 27 files changed, 74 insertions(+) diff --git a/dozer-ingestion/aerospike/src/connector.rs b/dozer-ingestion/aerospike/src/connector.rs index ad6b728e43..663c5fdbcb 100644 --- a/dozer-ingestion/aerospike/src/connector.rs +++ b/dozer-ingestion/aerospike/src/connector.rs @@ -525,6 +525,7 @@ impl Connector for AerospikeConnector { }, nullable: name != "PK", source: Default::default(), + description: None, }) .collect(), primary_index, diff --git a/dozer-ingestion/ethereum/src/log/helper.rs b/dozer-ingestion/ethereum/src/log/helper.rs index 9afecb8113..609d49df6c 100644 --- a/dozer-ingestion/ethereum/src/log/helper.rs +++ b/dozer-ingestion/ethereum/src/log/helper.rs @@ -38,6 +38,7 @@ pub fn get_contract_event_schemas( }, nullable: false, source: SourceDefinition::Dynamic, + description: None, }); } @@ -231,72 +232,84 @@ pub fn get_eth_schema() -> Schema { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "address".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "topics".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "data".to_string(), typ: FieldType::Binary, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "block_hash".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "block_number".to_string(), typ: FieldType::UInt, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_hash".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "log_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_log_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "log_type".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "removed".to_string(), typ: FieldType::Boolean, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], diff --git a/dozer-ingestion/ethereum/src/trace/helper.rs b/dozer-ingestion/ethereum/src/trace/helper.rs index 1ae76db81d..b69fbf67bd 100644 --- a/dozer-ingestion/ethereum/src/trace/helper.rs +++ b/dozer-ingestion/ethereum/src/trace/helper.rs @@ -93,48 +93,56 @@ pub fn get_trace_schema() -> Schema { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "from".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "to".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "value".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "gas".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "gas_used".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "input".to_string(), typ: FieldType::Text, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "output".to_string(), typ: FieldType::Text, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![], diff --git a/dozer-ingestion/javascript/src/lib.rs b/dozer-ingestion/javascript/src/lib.rs index 5f857b47db..ba9fef603a 100644 --- a/dozer-ingestion/javascript/src/lib.rs +++ b/dozer-ingestion/javascript/src/lib.rs @@ -67,6 +67,7 @@ impl Connector for JavaScriptConnector { typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }], primary_index: vec![], }, diff --git a/dozer-ingestion/kafka/src/debezium/mapper.rs b/dozer-ingestion/kafka/src/debezium/mapper.rs index 1825bcc868..8f496f18a6 100644 --- a/dozer-ingestion/kafka/src/debezium/mapper.rs +++ b/dozer-ingestion/kafka/src/debezium/mapper.rs @@ -317,24 +317,28 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "name".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "description".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "weight".to_string(), typ: FieldType::Float, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![], @@ -406,12 +410,14 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "name".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![], diff --git a/dozer-ingestion/kafka/src/debezium/schema.rs b/dozer-ingestion/kafka/src/debezium/schema.rs index 7a801921ab..76ee9bb5a0 100644 --- a/dozer-ingestion/kafka/src/debezium/schema.rs +++ b/dozer-ingestion/kafka/src/debezium/schema.rs @@ -82,6 +82,7 @@ pub fn map_schema( typ, nullable: f.optional.map_or(false, |o| o), source: SourceDefinition::Dynamic, + description: None, }) }) .collect(), @@ -196,12 +197,14 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "name".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/kafka/src/debezium/schema_registry.rs b/dozer-ingestion/kafka/src/debezium/schema_registry.rs index 9f4f154869..d88644ca46 100644 --- a/dozer-ingestion/kafka/src/debezium/schema_registry.rs +++ b/dozer-ingestion/kafka/src/debezium/schema_registry.rs @@ -133,6 +133,7 @@ impl SchemaRegistry { typ, nullable, source: SourceDefinition::Dynamic, + description: None, }) }) .collect(); diff --git a/dozer-ingestion/kafka/src/no_schema_registry_basic.rs b/dozer-ingestion/kafka/src/no_schema_registry_basic.rs index 890020a4f2..20f93a7b35 100644 --- a/dozer-ingestion/kafka/src/no_schema_registry_basic.rs +++ b/dozer-ingestion/kafka/src/no_schema_registry_basic.rs @@ -16,12 +16,14 @@ impl NoSchemaRegistryBasic { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "message".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/kafka/src/schema_registry_basic.rs b/dozer-ingestion/kafka/src/schema_registry_basic.rs index d07243c549..634ca1f030 100644 --- a/dozer-ingestion/kafka/src/schema_registry_basic.rs +++ b/dozer-ingestion/kafka/src/schema_registry_basic.rs @@ -51,6 +51,7 @@ impl SchemaRegistryBasic { typ, nullable, source: SourceDefinition::Dynamic, + description: None, }) }) .collect(); diff --git a/dozer-ingestion/mongodb/src/lib.rs b/dozer-ingestion/mongodb/src/lib.rs index 1e878c0296..190be41267 100644 --- a/dozer-ingestion/mongodb/src/lib.rs +++ b/dozer-ingestion/mongodb/src/lib.rs @@ -501,12 +501,14 @@ impl Connector for MongodbConnector { typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "data".to_owned(), typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/mysql/src/connector.rs b/dozer-ingestion/mysql/src/connector.rs index 44c99decc4..3229335cc7 100644 --- a/dozer-ingestion/mysql/src/connector.rs +++ b/dozer-ingestion/mysql/src/connector.rs @@ -177,6 +177,7 @@ impl Connector for MySQLConnector { typ, nullable, source: SourceDefinition::Dynamic, + description: None, } }, ) @@ -840,18 +841,21 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "c2".into(), typ: FieldType::Text, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "c3".into(), typ: FieldType::Float, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], @@ -866,12 +870,14 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "value".into(), typ: FieldType::Json, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/object-store/src/schema_helper.rs b/dozer-ingestion/object-store/src/schema_helper.rs index 56735f80ae..2f18df30cc 100644 --- a/dozer-ingestion/object-store/src/schema_helper.rs +++ b/dozer-ingestion/object-store/src/schema_helper.rs @@ -52,6 +52,7 @@ pub fn map_schema_to_dozer<'a, I: Iterator>>( typ: mapped_field_type, nullable: field.is_nullable(), source: SourceDefinition::Dynamic, + description: None, }) }) .collect() diff --git a/dozer-ingestion/oracle/src/connector/mapping.rs b/dozer-ingestion/oracle/src/connector/mapping.rs index 0e8a545bb9..718dce3349 100644 --- a/dozer-ingestion/oracle/src/connector/mapping.rs +++ b/dozer-ingestion/oracle/src/connector/mapping.rs @@ -215,6 +215,7 @@ pub fn decide_schema( connection: connection.to_string(), name: table_name.clone(), }, + description: None, }), Err(err) => return Err(Error::DataType(err.clone())), } diff --git a/dozer-ingestion/postgres/src/helper.rs b/dozer-ingestion/postgres/src/helper.rs index 4a90722da1..9d2fb3f4c5 100644 --- a/dozer-ingestion/postgres/src/helper.rs +++ b/dozer-ingestion/postgres/src/helper.rs @@ -362,6 +362,7 @@ pub fn convert_column_to_field(column: &Column) -> Result Client<'env> { typ, nullable: *nullable, source: SourceDefinition::Dynamic, + description: None, }); } } diff --git a/dozer-ingestion/tests/test_suite/data.rs b/dozer-ingestion/tests/test_suite/data.rs index d7554777bc..fa3c9d6ceb 100644 --- a/dozer-ingestion/tests/test_suite/data.rs +++ b/dozer-ingestion/tests/test_suite/data.rs @@ -9,12 +9,14 @@ pub fn records_without_primary_key() -> (FieldsAndPk, Vec>) { typ: FieldType::Int, nullable: false, source: Default::default(), + description: None, }, FieldDefinition { name: "uint".to_string(), typ: FieldType::UInt, nullable: false, source: Default::default(), + description: None, }, ]; diff --git a/dozer-sink-aerospike/src/denorm_dag.rs b/dozer-sink-aerospike/src/denorm_dag.rs index 3b41675514..a40d2ba372 100644 --- a/dozer-sink-aerospike/src/denorm_dag.rs +++ b/dozer-sink-aerospike/src/denorm_dag.rs @@ -1243,6 +1243,7 @@ mod tests { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ) diff --git a/dozer-sink-aerospike/src/lib.rs b/dozer-sink-aerospike/src/lib.rs index b48a7ef3e1..c28937932b 100644 --- a/dozer-sink-aerospike/src/lib.rs +++ b/dozer-sink-aerospike/src/lib.rs @@ -589,6 +589,7 @@ mod tests { typ, nullable: false, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, } } @@ -654,6 +655,7 @@ mod tests { typ: FieldType::UInt, nullable: true, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, }, false, ) diff --git a/dozer-sink-clickhouse/src/metadata.rs b/dozer-sink-clickhouse/src/metadata.rs index 4d3b409e02..d5c6075c80 100644 --- a/dozer-sink-clickhouse/src/metadata.rs +++ b/dozer-sink-clickhouse/src/metadata.rs @@ -29,6 +29,7 @@ impl ReplicationMetadata { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ) @@ -38,6 +39,7 @@ impl ReplicationMetadata { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, false, ) diff --git a/dozer-sink-clickhouse/src/tests.rs b/dozer-sink-clickhouse/src/tests.rs index aad238db07..859eb42458 100644 --- a/dozer-sink-clickhouse/src/tests.rs +++ b/dozer-sink-clickhouse/src/tests.rs @@ -32,12 +32,14 @@ fn _get_dozer_schema() -> Schema { typ: FieldType::UInt, nullable: false, source: Default::default(), + description: None, }, FieldDefinition { name: "data".to_string(), typ: FieldType::String, nullable: false, source: Default::default(), + description: None, }, ], primary_index: vec![0], diff --git a/dozer-sink-oracle/src/lib.rs b/dozer-sink-oracle/src/lib.rs index 66970af5ae..71e9594476 100644 --- a/dozer-sink-oracle/src/lib.rs +++ b/dozer-sink-oracle/src/lib.rs @@ -516,6 +516,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: true, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, }, false, ); @@ -525,6 +526,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: true, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, }, false, ); @@ -558,6 +560,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ) @@ -567,6 +570,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, false, ), @@ -935,6 +939,7 @@ mod tests { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ); @@ -944,6 +949,7 @@ mod tests { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ); @@ -976,6 +982,7 @@ mod tests { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, } } } diff --git a/dozer-sql/src/product/join/operator/table.rs b/dozer-sql/src/product/join/operator/table.rs index 5be3180c37..a16e7094e9 100644 --- a/dozer-sql/src/product/join/operator/table.rs +++ b/dozer-sql/src/product/join/operator/table.rs @@ -205,6 +205,7 @@ mod tests { typ: FieldType::Int, nullable: false, source: Default::default(), + description: None, }], primary_index: vec![0], }; diff --git a/dozer-sql/src/product/join/processor.rs b/dozer-sql/src/product/join/processor.rs index d8e0aa683f..eb187a8464 100644 --- a/dozer-sql/src/product/join/processor.rs +++ b/dozer-sql/src/product/join/processor.rs @@ -147,6 +147,7 @@ mod tests { connection: "test".into(), name: table_name.into(), }, + description: None, }, true, ) @@ -159,6 +160,7 @@ mod tests { connection: "test".into(), name: table_name.into(), }, + description: None, }, false, ); diff --git a/dozer-tests/src/sql_tests/helper/schema.rs b/dozer-tests/src/sql_tests/helper/schema.rs index ddca12474b..87d640d938 100644 --- a/dozer-tests/src/sql_tests/helper/schema.rs +++ b/dozer-tests/src/sql_tests/helper/schema.rs @@ -28,6 +28,7 @@ pub fn get_schema(columns: &[rusqlite::Column]) -> Schema { }, nullable: true, source: SourceDefinition::Dynamic, + description: None, } }) .collect(); diff --git a/dozer-types/src/arrow_types/from_arrow.rs b/dozer-types/src/arrow_types/from_arrow.rs index 6117d4fd5e..b76943a5a9 100644 --- a/dozer-types/src/arrow_types/from_arrow.rs +++ b/dozer-types/src/arrow_types/from_arrow.rs @@ -189,6 +189,7 @@ fn handle_with_dozer_schema( typ, nullable: field.is_nullable(), source: SourceDefinition::Dynamic, + description: None, }); } diff --git a/dozer-types/src/types/mod.rs b/dozer-types/src/types/mod.rs index 507d226f03..31d111d289 100644 --- a/dozer-types/src/types/mod.rs +++ b/dozer-types/src/types/mod.rs @@ -53,6 +53,7 @@ pub struct FieldDefinition { pub nullable: bool, #[serde(default)] pub source: SourceDefinition, + pub description: Option, } impl FieldDefinition { @@ -62,6 +63,7 @@ impl FieldDefinition { typ, nullable, source, + description: None, } }