diff --git a/docsite/docs/protocols/delta-sharing-protocol.md b/docsite/docs/protocols/delta-sharing-protocol.md
index 35a6fb5d4..e73ec48f1 100644
--- a/docsite/docs/protocols/delta-sharing-protocol.md
+++ b/docsite/docs/protocols/delta-sharing-protocol.md
@@ -1376,12 +1376,39 @@ delta-table-version: 123
This is the API for clients to query the table schema and other metadata.
-HTTP Request | Value
--|-
-Method | `GET`
-Header | `Authorization: Bearer [token]`
-URL | `{prefix}/shares/{share}/schemas/{schema}/tables/{table}/metadata`
-URL Parameters | **\{share\}**: The share name to query. It's case-insensitive.
**\{schema\}**: The schema name to query. It's case-insensitive.
**\{table\}**: The table name to query. It's case-insensitive.
+
+
+HTTP Request |
+Value |
+
+
+Method |
+`GET` |
+
+
+Headers |
+
+`Authorization: Bearer {token}`
+
+Optional: `delta-sharing-capabilities: responseformat=delta;readerfeatures=deletionvectors`, see
+[Delta Sharing Capabilities Header](#delta-sharing-capabilities-header) for details.
+ |
+
+
+URL |
+
+`{prefix}/shares/{share}/schemas/{schema}/tables/{table}/metadata`
+ |
+
+
+URL Parameters |
+
+**\{share\}**: The share name to query. It's case-insensitive.
+**\{schema\}**: The schema name to query. It's case-insensitive.
+**\{table\}**: The table name to query. It's case-insensitive.
+ |
+
+
200: The table metadata was successfully returned.
@@ -1409,10 +1436,17 @@ URL Parameters | **\{share\}**: The share name to query. It's case-insensitive.<
A sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format](#api-response-format).
+When `responseformat=parquet`, each line is a JSON object defined in [API Response Format in Parquet](#api-response-format-in-parquet).
+
The response contains two lines:
- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line) containing the table [Protocol](#protocol) object.
- The second line is [a JSON wrapper object](#json-wrapper-object-in-each-line) containing the table [Metadata](#metadata) object.
+When `responseformat=delta`, each line is a Json object defined in [API Response Format in Delta](#api-response-format-in-delta).
+The response contains two lines:
+- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Protocol](#protocol-in-delta-format) object.
+- The second line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Metadata](#metadata-in-delta-format) object.
+
@@ -1572,7 +1606,7 @@ The response contains two lines:
-Example (See [API Response Format](#api-response-format) for more details about the format):
+Example (See [API Response Format in Parquet](#api-response-format-in-parquet) for more details about the format):
`GET {prefix}/shares/vaccine_share/schemas/acme_vaccine_data/tables/vaccine_patients/metadata`
@@ -1627,6 +1661,10 @@ This is the API for clients to read data from a table.
Optional: `Content-Type: application/json; charset=utf-8`
+Optional: `delta-sharing-capabilities: responseformat=delta;readerfeatures=deletionvectors`, see
+[Delta Sharing Capabilities Header](#delta-sharing-capabilities-header) for details.
+
+
@@ -1697,7 +1735,8 @@ returned in the response.
Body |
-A sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format](#api-response-format).
+When `responseformat=parquet`, a sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format in Parquet](#api-response-format-in-parquet).
+
The response contains multiple lines:
- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line) containing the table [Protocol](#protocol) object.
@@ -1707,6 +1746,18 @@ The response contains multiple lines:
- The lines are [files](#file) in the table (otherwise).
- The ordering of the lines doesn't matter.
+When `responseformat=delta`, a sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format in Delta](#api-response-format-in-delta).
+
+The response contains multiple lines:
+- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Protocol](#protocol-in-delta-format) object.
+- The second line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Metadata](#metadata-in-delta-format) object.
+- The rest of the lines are [JSON wrapper objects](#json-wrapper-object-in-each-line-in-delta) for [Metadata](#metadata-in-delta-format), or [files](#file-in-delta-format).
+ - The lines are [files](#file-in-delta-format) which wraps the delta single action in the table (otherwise), with possible historical [Metadata](#metadata-in-delta-format) (when startingVersion is set).
+ - The ordering of the lines doesn't matter.
+
+The delta actions are wrapped because they will be used to construct a local delta log on the recipient
+side and then leverage the delta library to read data.
+
|
@@ -1899,7 +1950,7 @@ The request body should be a JSON string containing the following optional field
When `predicateHints` and `limitHint` are both present, the server should apply `predicateHints` first then `limitHint`. As these two parameters are hints rather than enforcement, the client must always apply `predicateHints` and `limitHint` on the response returned by the server if it wishes to filter and limit the returned data. An empty JSON object (`{}`) should be provided when these two parameters are missing.
-Example (See [API Response Format](#api-response-format) for more details about the format):
+Example (See [API Response Format in Parquet](#api-response-format-in-parquet) for more details about the format):
`POST {prefix}/shares/vaccine_share/schemas/acme_vaccine_data/tables/vaccine_patients/query`
@@ -1993,6 +2044,8 @@ The change data feed represents row-level changes between versions of a Delta ta
`Authorization: Bearer [token]`
+Optional: `delta-sharing-capabilities: responseformat=delta;readerfeatures=deletionvectors`, see [Delta Sharing Capabilities Header](#delta-sharing-capabilities-header) for details.
+
@@ -2048,7 +2101,7 @@ The change data feed represents row-level changes between versions of a Delta ta
Body |
-A sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format](#api-response-format).
+When `responseformat=parquet`, a sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format in Parquet](#api-response-format-in-parquet).
The response contains multiple lines:
- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line) containing the table [Protocol](#protocol) object.
@@ -2057,6 +2110,13 @@ The response contains multiple lines:
- Historical [Metadata](#metadata) will be returned if includeHistoricalMetadata is set to true.
- The ordering of the lines doesn't matter.
+When `responseformat=delta`, a sequence of JSON strings delimited by newline. Each line is a JSON object defined in [API Response Format in Parquet](#api-response-format-in-delta).
+- The first line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Protocol](#protocol-in-delta-format) object.
+- The second line is [a JSON wrapper object](#json-wrapper-object-in-each-line-in-delta) containing the delta [Metadata](#metadata-in-delta-format) object.
+- The rest of the lines are [JSON wrapper objects](#json-wrapper-object-in-each-line) for [Files](#file-in-delta-format) of the change data feed.
+ - Historical [Metadata](#metadata) will be returned if includeHistoricalMetadata is set to true.
+ - The ordering of the lines doesn't matter.
+
|
@@ -2216,7 +2276,7 @@ The response contains multiple lines:
-Example (See [API Response Format](#api-response-format) for more details about the format):
+Example (See [API Response Format in Parquet](#api-response-format-in-parquet) for more details about the format):
`GET {prefix}/shares/vaccine_share/schemas/acme_vaccine_data/tables/vaccine_patients/changes?startingVersion=0&endingVersion=2`
@@ -2289,7 +2349,61 @@ content-type: application/x-ndjson; charset=utf-8
### Timestamp Format
Accepted timestamp format by a delta sharing server: in the ISO8601 format, in the UTC timezone, such as `2022-01-01T00:00:00Z`.
-## API Response Format
+## Delta Sharing Capabilities Header
+
+This section explains the details of delta sharing capabilities header, which was introduced to help
+delta sharing catch up with features in [delta protocol](https://github.com/delta-io/delta/blob/master/PROTOCOL.md).
+
+The key of the header is **delta-sharing-capabilities**, the value is semicolon separated capabilities.
+Each capability is in the format of "key=value1,value2", values are separated by commas.
+Example: "responseformat=delta;readerfeatures=deletionvectors,columnmapping". All keys and values should
+be case-insensitive when processed by the server.
+
+This header can be used in the request for [Query Table Metadata](#query-table-metadata),
+[Query Table](#read-data-from-a-table), and [Query Table Changes](#read-change-data-feed-from-a-table).
+
+**Compatibility**
+
+
+
+Client/Server |
+Server that doesn't recognize the header |
+Server that recognizes the header |
+
+
+Client that doesn't specify the header |
+Response is in parquet format |
+Response must be in parquet format. |
+
+
+Client that specifies the header |
+The header is ignored at the server, and the format of the response must always be parquet. |
+The header is processed properly by the server. If there's only one responseFormat specified, the server must respect and return in the requested format. If there's a list of responseFormat specified, such as `responseFormat=delta,parquet`. The server may choose to respond in parquet format if the table does not have any advanced features. The server must respond in delta format if the table has advanced features which are not compatible with the parquet format. |
+
+
+
+- If the client requests `delta` format and the response is in `parquet` format, the delta sharing
+client will NOT throw an error. Ideally, the caller of the client's method should handle such
+responses to be compatible with legacy servers.
+- If the client doesn't specify any header, or requests `parquet` format and the response is in
+`delta` format, the delta sharing client must throw an error.
+
+### responseFormat
+Indicates the format to expect in the [API Response Format in Parquet](#api-response-format-in-parquet), two values are supported.
+
+- parquet: Represents the format of the delta sharing protocol that has been used in `delta-sharing-spark` 1.0
+and less, also the default format if `responseFormat` is missing from the header. All the existing delta
+sharing connectors are able to process data in this format.
+- **delta**: format can be used to read a shared delta table with minReaderVersion > 1, which contains
+readerFeatures such as Deletion Vector or Column Mapping. `delta-sharing-spark` libraries
+that are able to process `responseformat=delta` will be released soon.
+
+### readerFeatures
+readerfeatures is only useful when `responseformat=delta`, it includes values from [delta reader
+features](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#table-features). It's set by the
+caller of `DeltaSharingClient` to indicate its ability to process delta readerFeatures.
+
+## API Response Format in Parquet
This section discusses the API Response Format returned by the server.
@@ -2299,7 +2413,7 @@ The JSON object in each line is a wrapper object that may contain the following
Field Name | Data Type | Description | Optional/Required
-|-|-|-
-protocol | The [Protocol](#protocol) JSON object. | Defines the versioning information about the API Response Format. | Optional
+protocol | The [Protocol](#protocol) JSON object. | Defines the versioning information about the API Response Format in Parquet. | Optional
metaData | The [Metadata](#metadata) JSON object. | The table metadata including schema, partitionColumns, etc. | Optional
file | The [File](#file) JSON object. | An individual data file in the table. | Optional
@@ -2335,7 +2449,7 @@ description | String | User-provided description for this table | Optional
format | [Format](#format) Object | Specification of the encoding for the files stored in the table. | Required
schemaString | String | Schema of the table. This is a serialized JSON string which can be deserialized to a [Schema](#schema-object) Object. | Required
partitionColumns | Array[String] | An array containing the names of columns by which the data should be partitioned. When a table doesn’t have partition columns, this will be an **empty** array. | Required
-configuration | Map[String, String] | A map containing configuration options for the table
+configuration | Map[String, String] | A map containing configuration options for the table | Optional
version | Long | The table version the metadata corresponds to, returned when querying table data with a version or timestamp parameter, or cdf query with includeHistoricalMetadata set to true. | Optional
size | Long | The size of the table in bytes, will be returned if available in the delta log. | Optional
numFiles | Long | The number of files in the table, will be returned if available in the delta log. | Optional
@@ -2762,6 +2876,119 @@ nullCount | The number of `null` values for this column
minValues | A value smaller than all values present in the file for this column
maxValues | A value larger than all values present in the file for this column
+## API Response Format in Delta
+
+This section discusses the API Response Format in Delta returned by the server. When a table is shared
+as delta format, the actions in the response could be put in a delta log in the local storage on the
+recipient side for the delta library to read data out of it directly. This way of sharing makes the
+delta sharing protocol more transparent and robust in supporting advanced delta feature, and minimizes code duplication.
+
+### JSON Wrapper Object In Each Line in Delta
+
+The JSON object in each line is a wrapper object that may contain the following fields. For each
+field, it is a wrapper of a [delta action](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#actions)(which keeps the action in its delta format and with original
+values), and with some additional fields for delta sharing functionalities.
+
+Field Name | Data Type | Description | Optional/Required
+-|-|-|-
+protocol | The [Protocol in Delta Format](#protocol-in-delta-format) JSON object. | A wrapper of delta protocol. | Optional
+metaData | The [Metadata in Delta Format](#metadata-in-delta-format) JSON object. | A wrapper of delta metadata, including some delta sharing specific fields. | Optional
+file | The [File in Delta Format](#file-in-delta-format) JSON object. | A wrapper of a delta single action in the table. | Optional
+
+It must contain only **ONE** of the above fields.
+
+### Protocol in Delta Format
+
+A wrapper of a [delta protocol](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#protocol-evolution).
+
+Field Name | Data Type | Description | Optional/Required
+-|-|-|-
+deltaProtocol | Delta Protocol | Need to be parsed by a delta library as a delta protocol. | Required
+
+Example (for illustration purposes; each JSON object must be a single line in the response):
+
+```json
+{
+ "protocol": {
+ "deltaProtocol": {
+ "minReaderVersion": 3,
+ "minWriterVersion": 7
+ }
+ }
+}
+```
+
+### Metadata in Delta Format
+
+A wrapper of a [delta Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata).
+
+Field Name | Data Type | Description | Optional/Required
+-|-|-|-
+deltaMetadata | Delta Metadata | Need to be parsed by a delta library as delta metadata | Required
+version | Long | The table version the metadata corresponds to, returned when querying table data with a version or timestamp parameter, or cdf query with includeHistoricalMetadata set to true. | Optional
+size | Long | The size of the table in bytes, will be returned if available in the delta log. | Optional
+numFiles | Long | The number of files in the table, will be returned if available in the delta log. | Optional
+
+Example (for illustration purposes; each JSON object must be a single line in the response):
+
+```json
+{
+ "metaData": {
+ "version": 20,
+ "size": 123456,
+ "numFiles": 5,
+ "deltaMetadata": {
+ "partitionColumns": [
+ "date"
+ ],
+ "format": {
+ "provider": "parquet"
+ },
+ "schemaString": "{\"type\":\"struct\",\"fields\":[{\"name\":\"eventTime\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}",
+ "id": "f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2",
+ "configuration": {
+ "enableChangeDataFeed": "true"
+ }
+ }
+ }
+}
+```
+
+### File in Delta Format
+
+A wrapper of a delta file action, which can be [Add File and Remove File](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#add-file-and-remove-file),
+or [Add CDC File](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#add-cdc-file)
+
+Field Name | Data Type | Description | Optional/Required
+-|-|-|-
+id | String | A unique string for the file in a table. The same file is guaranteed to have the same id across multiple requests. A client may cache the file content and use this id as a key to decide whether to use the cached file content. | Required
+deletionVectorFileId | String | A unique string for the deletion vector file in a table. The same deletion vector file is guaranteed to have the same id across multiple requests. A client may cache the file content and use this id as a key to decide whether to use the cached file content. | Optional
+version | Long | The table version of the file, returned when querying a table data with a version or timestamp parameter. | Optional
+timestamp | Long | The unix timestamp corresponding to the table version of the file, in milliseconds, returned when querying a table data with a version or timestamp parameter. | Optional
+expirationTimestamp | Long | The unix timestamp corresponding to the expiration of the url, in milliseconds, returned when the server supports the feature. | Optional
+deltaSingleAction | Delta SingleAction | Need to be parsed by a delta library as a delta single action, the path field is replaced by pr-signed url. | Required
+
+Example (for illustration purposes; each JSON object must be a single line in the response):
+
+```json
+{
+ "file": {
+ "id": "591723a8-6a27-4240-a90e-57426f4736d2",
+ "size": 573,
+ "expirationTimestamp": 1652140800000,
+ "deltaSingleAction": {
+ "add": {
+ "path": "https://.s3.us-west-2.amazonaws.com/delta-exchange-test/table2/date%3D2021-04-28/part-00000-591723a8-6a27-4240-a90e-57426f4736d2.c000.snappy.parquet?...",
+ "partitionValues": {
+ "date": "2021-04-28"
+ },
+ "stats": "{\"numRecords\":1,\"minValues\":{\"eventTime\":\"2021-04-28T23:33:48.719Z\"},\"maxValues\":{\"eventTime\":\"2021-04-28T23:33:48.719Z\"},\"nullCount\":{\"eventTime\":0}}"
+ }
+ }
+ }
+}
+```
+
## SQL Expressions for Filtering
The client may send a sequence of predicates to the server as a hint to request fewer files when it only wishes to query a subset of the data (e.g., data where the `country` field is `US`). The server may try its best to filter files based on the predicates. This is **BEST EFFORT**, so the server may return files that don’t satisfy the predicates. For example, if the server fails to parse a SQL expression, the server can skip it. Hence, the client should always apply predicates to filter the data returned by the server.
@@ -2824,7 +3051,9 @@ ValueType | Description
"long" | Represents a Long type.
"string" | Represents a String type.
"date" | Represents a Date type in "yyyy-mm-dd" format.
-
+"float" | Represents a Float type.
+"double" | Represents a Double type.
+"timestamp" | Represents a timestamp in [Timestamp Format](#timestamp-format).
Examples
@@ -2880,6 +3109,25 @@ Examples
}
```
+## Delta Sharing Streaming Specs
+Delta Sharing Streaming is supported starting from delta-sharing-spark 0.6.0. As it's implemented
+based on spark structured streaming, it leverages a pull model to consume the new data of the shared
+table from the delta sharing server. In addition to most options supported in delta streaming,
+there are two options/spark configs for delta sharing streaming.
+
+- spark config **spark.delta.sharing.streaming.queryTableVersionIntervalSeconds**: DeltaSharingSource
+leverages [getTableVersion](#query-table-version) rpc to check whether there is new data available
+to consume. In order to reduce the traffic burden to the delta sharing server, there's a minimum 30
+seconds interval between two getTableVersion rpcs to the delta sharing server. Though, if you are ok
+with less freshness on the data and want to reduce the traffic to the server, you can set this
+config to a larger number, for example: 60s or 120s. An error will be thrown if it's set less than 30 seconds.
+- option **maxVersionsPerRpc**: DeltaSharingSource leverages [QueryTable](#read-data-from-a-table)
+rpc to continuously read new data from the delta sharing server. There might be too much
+new data to be returned from the server if the streaming has paused for a while on the recipient
+side. Its default value is 100, a smaller number is recommended such as `.option("maxVersionsPerRpc", 10)`
+to reduce the traffic load for each rpc. This shouldn't affect the freshness of the data significantly
+assuming the process time of the delta sharing server grows linearly with `maxVersionsPerRpc`.
+
# Profile File Format
diff --git a/protocol/delta-sharing-protocol-api.yml b/protocol/delta-sharing-protocol-api.yml
index 8cd1e94e8..2a57040c6 100644
--- a/protocol/delta-sharing-protocol-api.yml
+++ b/protocol/delta-sharing-protocol-api.yml
@@ -521,6 +521,12 @@ paths:
description: 'If set to true, return the historical metadata if seen in the delta log. This is for the streaming client to check if the table schema is still read compatible.'
schema:
type: boolean
+ - in: header
+ name: delta-sharing-capabilities
+ required: false
+ description: 'Delta Sharing Capabilities'
+ schema:
+ type: string
responses:
'400':
$ref: "#/components/responses/400"
@@ -575,11 +581,14 @@ components:
not Unary Represents a logical not check. This op should have once child.
The supported value types:
ValueType Description
- "bool" Represents an Boolean type.
- "int" Represents an Integer type.
- "long" Represents a Long type.
- "string" Represents a String type.
- "date" Represents a Date type in "yyyy-mm-dd" format.
+ "bool" Represents an Boolean type.
+ "int" Represents an Integer type.
+ "long" Represents a Long type.
+ "string" Represents a String type.
+ "date" Represents a Date type in "yyyy-mm-dd" format.
+ "float" Represents a Float type.
+ "double" Represents a Double type.
+ "timestamp" Represents a timestamp in ISO8601 format, in the UTC timezone.
ListShareResponse:
type: object
@@ -758,124 +767,6 @@ components:
type: string
message:
type: string
-
- # This is not used for the spec but comes handy for autogeneration
- TableMetadataResponseObject:
- type: object
- properties:
- protocol:
- # it refers to ./delta-sharing-protocol.md#protocol
- $ref: '#/components/schemas/ProtocolObject'
- metadata:
- # it refers to ./delta-sharing-protocol.md#metadata
- $ref: '#/components/schemas/MetadataObject'
-
- # This is not used for the spec but comes handy for autogeneration
- TableQueryResponseObject:
- type: object
- properties:
- protocol:
- # it refers to ./delta-sharing-protocol.md#protocol
- $ref: '#/components/schemas/ProtocolObject'
- metadata:
- # it refers to ./delta-sharing-protocol.md#metadata
- $ref: '#/components/schemas/MetadataObject'
- files:
- type: array
- items:
- # it refers to ./delta-sharing-protocol.md#file
- $ref: '#/components/schemas/FileObject'
- FileObject:
- type: object
- properties:
- file:
- type: object
- properties:
- url:
- type: string
- id:
- type: string
- partitionValues:
- type: object
- additionalProperties:
- type:
- string
- size:
- type: integer
- format: int64
- stats:
- type: string
- version:
- type: integer
- format: int64
- timestamp:
- type: integer
- format: int64
- expirationTimestamp:
- type: integer
- format: int64
- required:
- - url
- - id
- - partitionValues
- - size
- ProtocolObject:
- type: object
- properties:
- protocol:
- type: object
- properties:
- minReaderVersion:
- type: integer
- format: int32
- FormatObject:
- type: object
- properties:
- provider:
- type: string
- required:
- - provider
-
- MetadataObject:
- type: object
- properties:
- metaData:
- type: object
- properties:
- id:
- type: string
- name:
- type: string
- description:
- type: string
- format:
- $ref: '#/components/schemas/FormatObject'
- schemaString:
- type: string
- partitionColumns:
- type: array
- items:
- type: string
- configuration:
- type: object
- additionalProperties:
- type:
- string
- version:
- type: integer
- format: int64
- size:
- type: integer
- format: int64
- numFiles:
- type: integer
- format: int64
- required:
- - id
- - format
- - schemaString
- - partitionColumns
-
responses:
"400":
description: The request is malformed
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java b/server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java
index 47503f78e..406c65ea4 100644
--- a/server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java
@@ -1,7 +1,13 @@
package io.whitefox.api.deltasharing;
+import io.whitefox.api.deltasharing.model.v1.TableMetadataResponse;
+import io.whitefox.api.deltasharing.model.v1.TableQueryResponse;
import io.whitefox.api.deltasharing.model.v1.generated.*;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetFile;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
import io.whitefox.api.server.CommonMappers;
+import io.whitefox.api.server.WhitefoxMappers;
import io.whitefox.core.*;
import io.whitefox.core.Schema;
import io.whitefox.core.Share;
@@ -53,46 +59,46 @@ public static ReadTableRequest api2ReadTableRequest(QueryRequest request) {
}
}
- public static TableQueryResponseObject readTableResult2api(ReadTableResult readTableResult) {
- return new TableQueryResponseObject()
- .metadata(metadata2Api(readTableResult.metadata()))
- .protocol(protocol2Api(readTableResult.protocol()))
- .files(readTableResult.files().stream()
- .map(DeltaMappers::file2Api)
- .collect(Collectors.toList()));
+ public static TableQueryResponse readTableResult2api(ReadTableResult readTableResult) {
+ return new TableQueryResponse(
+ protocol2Api(readTableResult.protocol()),
+ metadata2Api(readTableResult.metadata()),
+ readTableResult.files().stream().map(DeltaMappers::file2Api).collect(Collectors.toList()));
}
- private static MetadataObject metadata2Api(Metadata metadata) {
- return new MetadataObject()
- .metaData(new MetadataObjectMetaData()
+ private static ParquetMetadata metadata2Api(Metadata metadata) {
+ return ParquetMetadata.builder()
+ .metadata(ParquetMetadata.Metadata.builder()
.id(metadata.id())
- .name(metadata.name().orElse(null))
- .description(metadata.description().orElse(null))
- .format(new FormatObject().provider(metadata.format().provider()))
+ .name(metadata.name())
+ .description(metadata.description())
+ .format(WhitefoxMappers.format2api(metadata.format()))
.schemaString(metadata.tableSchema().structType().toJson())
.partitionColumns(metadata.partitionColumns())
- ._configuration(metadata.configuration())
- .version(metadata.version().orElse(null))
- .numFiles(metadata.numFiles().orElse(null)));
+ .configuration(Optional.of(metadata.configuration()))
+ .version(metadata.version())
+ .numFiles(metadata.numFiles())
+ .build())
+ .build();
}
- private static ProtocolObject protocol2Api(Protocol protocol) {
- return new ProtocolObject()
- .protocol(new ProtocolObjectProtocol()
- .minReaderVersion(protocol.minReaderVersion().orElse(1)));
+ private static ParquetProtocol protocol2Api(Protocol protocol) {
+ return ParquetProtocol.ofMinReaderVersion(protocol.minReaderVersion().orElse(1));
}
- private static FileObject file2Api(TableFile f) {
- return new FileObject()
- ._file(new FileObjectFile()
+ private static ParquetFile file2Api(TableFile f) {
+ return ParquetFile.builder()
+ .file(ParquetFile.File.builder()
.id(f.id())
.url(f.url())
.partitionValues(f.partitionValues())
.size(f.size())
- .stats(f.stats().orElse(null))
- .version(f.version().orElse(null))
- .timestamp(f.timestamp().orElse(null))
- .expirationTimestamp(f.expirationTimestamp()));
+ .stats(f.stats())
+ .version(f.version())
+ .timestamp(f.timestamp())
+ .expirationTimestamp(Optional.of(f.expirationTimestamp()))
+ .build())
+ .build();
}
public static TableReferenceAndReadRequest api2TableReferenceAndReadRequest(
@@ -127,9 +133,9 @@ public static Map toHeaderCapabilitiesMap(String headerCapabilit
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
- public static TableMetadataResponseObject toTableResponseMetadata(Metadata m) {
- return new TableMetadataResponseObject()
- .protocol(new ProtocolObject().protocol(new ProtocolObjectProtocol().minReaderVersion(1)))
- .metadata(metadata2Api(m));
+ public static TableMetadataResponse toTableResponseMetadata(Metadata m) {
+ return new TableMetadataResponse(
+ ParquetProtocol.ofMinReaderVersion(1), // smell
+ metadata2Api(m));
}
}
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/Format.java b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/Format.java
new file mode 100644
index 000000000..04ba2d488
--- /dev/null
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/Format.java
@@ -0,0 +1,26 @@
+package io.whitefox.api.deltasharing.model.v1;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.EqualsAndHashCode;
+
+@EqualsAndHashCode
+public class Format {
+ private static final String PARQUET = "parquet";
+
+ @JsonProperty
+ public String provider() {
+ return PARQUET;
+ }
+
+ public Format() {
+ this(PARQUET);
+ }
+
+ @JsonCreator
+ private Format(@JsonProperty("provider") String provider) {
+ if (!"parquet".equalsIgnoreCase(provider)) {
+ throw new IllegalArgumentException("Provider must be " + PARQUET);
+ }
+ }
+}
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableMetadataResponse.java b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableMetadataResponse.java
new file mode 100644
index 000000000..7b99bb625
--- /dev/null
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableMetadataResponse.java
@@ -0,0 +1,13 @@
+package io.whitefox.api.deltasharing.model.v1;
+
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
+import lombok.NonNull;
+import lombok.Value;
+
+@Value
+public class TableMetadataResponse {
+ @NonNull ParquetProtocol protocol;
+
+ @NonNull ParquetMetadata metadata;
+}
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableQueryResponse.java b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableQueryResponse.java
new file mode 100644
index 000000000..52f7311af
--- /dev/null
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/TableQueryResponse.java
@@ -0,0 +1,17 @@
+package io.whitefox.api.deltasharing.model.v1;
+
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetFile;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
+import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
+import java.util.List;
+import lombok.NonNull;
+import lombok.Value;
+
+@Value
+public class TableQueryResponse {
+ @NonNull ParquetProtocol protocol;
+
+ @NonNull ParquetMetadata metadata;
+
+ @NonNull List files;
+}
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaFile.java b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaFile.java
new file mode 100644
index 000000000..b0426f406
--- /dev/null
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaFile.java
@@ -0,0 +1,73 @@
+package io.whitefox.api.deltasharing.model.v1.delta;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.util.Optional;
+import lombok.Builder;
+import lombok.NonNull;
+import lombok.Value;
+import lombok.experimental.SuperBuilder;
+import lombok.extern.jackson.Jacksonized;
+
+@Value
+@SuperBuilder
+@Jacksonized
+public class DeltaFile {
+
+ @JsonProperty
+ @NonNull File file;
+
+ @Value
+ @SuperBuilder
+ @Jacksonized
+ @JsonInclude(JsonInclude.Include.NON_ABSENT)
+ public static class File {
+
+ /**
+ * A unique string for the file in a table.
+ * The same file is guaranteed to have the same id across multiple requests.
+ * A client may cache the file content and use this id as a key to decide whether to use the cached file content.
+ */
+ @JsonProperty
+ @NonNull String id;
+
+ /**
+ * A unique string for the deletion vector file in a table.
+ * The same deletion vector file is guaranteed to have the same id across multiple requests.
+ * A client may cache the file content and use this id as a key to decide whether to use the cached file content.
+ */
+ @JsonProperty
+ @Builder.Default
+ Optional deletionVectorFileId = Optional.empty();
+
+ /**
+ * The table version of the file, returned when querying a table data with a version or timestamp parameter.
+ */
+ @JsonProperty
+ @Builder.Default
+ Optional version = Optional.empty();
+
+ /**
+ * The unix timestamp corresponding to the table version of the file, in milliseconds,
+ * returned when querying a table data with a version or timestamp parameter.
+ */
+ @JsonProperty
+ @Builder.Default
+ Optional timestamp = Optional.empty();
+
+ /**
+ * The unix timestamp corresponding to the expiration of the url, in milliseconds,
+ * returned when the server supports the feature.
+ */
+ @JsonProperty
+ @Builder.Default
+ Optional expirationTimestamp = Optional.empty();
+
+ /**
+ * Need to be parsed by a delta library as a delta single action, the path field is replaced by pr-signed url.
+ */
+ @JsonProperty
+ @NonNull JsonNode deltaSingleAction;
+ }
+}
diff --git a/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaInternalFormat.java b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaInternalFormat.java
new file mode 100644
index 000000000..00cb0287c
--- /dev/null
+++ b/server/app/src/main/java/io/whitefox/api/deltasharing/model/v1/delta/DeltaInternalFormat.java
@@ -0,0 +1,41 @@
+package io.whitefox.api.deltasharing.model.v1.delta;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.Map;
+import java.util.Optional;
+import lombok.EqualsAndHashCode;
+
+@EqualsAndHashCode
+@JsonInclude(JsonInclude.Include.NON_ABSENT)
+public class DeltaInternalFormat {
+ private static final String PARQUET = "parquet";
+
+ private final Optional