-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Impl rest catalog + table updates & requirements #146
base: main
Are you sure you want to change the base?
Changes from all commits
f9febaf
3b3a10f
3416129
56c8458
4c49f96
bded0e6
ca9796b
1f9edd7
c48301c
c48e9f7
adcef8f
ca88a69
bdef610
fa6c5b5
795a6c9
a952f2b
48b3f2d
5b70ef0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,86 @@ func (e errorResponse) Error() string { | |
return e.Type + ": " + e.Message | ||
} | ||
|
||
type Identifier struct { | ||
Namespace []string `json:"namespace"` | ||
Name string `json:"name"` | ||
} | ||
Comment on lines
+87
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we're going to export this type, we should probably name it |
||
|
||
type commitTableResponse struct { | ||
MetadataLoc string `json:"metadata-location"` | ||
RawMetadata json.RawMessage `json:"metadata"` | ||
Metadata table.Metadata `json:"-"` | ||
} | ||
|
||
func (t *commitTableResponse) UnmarshalJSON(b []byte) (err error) { | ||
type Alias commitTableResponse | ||
if err = json.Unmarshal(b, (*Alias)(t)); err != nil { | ||
return err | ||
} | ||
|
||
t.Metadata, err = table.ParseMetadataBytes(t.RawMetadata) | ||
return | ||
} | ||
|
||
type loadTableResponse struct { | ||
MetadataLoc string `json:"metadata-location"` | ||
RawMetadata json.RawMessage `json:"metadata"` | ||
Config iceberg.Properties `json:"config"` | ||
Metadata table.Metadata `json:"-"` | ||
} | ||
|
||
func (t *loadTableResponse) UnmarshalJSON(b []byte) (err error) { | ||
type Alias loadTableResponse | ||
if err = json.Unmarshal(b, (*Alias)(t)); err != nil { | ||
return err | ||
} | ||
|
||
t.Metadata, err = table.ParseMetadataBytes(t.RawMetadata) | ||
return | ||
} | ||
|
||
type createTableOption func(*createTableRequest) | ||
|
||
func WithLocation(loc string) createTableOption { | ||
return func(req *createTableRequest) { | ||
req.Location = strings.TrimRight(loc, "/") | ||
} | ||
} | ||
|
||
func WithPartitionSpec(spec iceberg.PartitionSpec) createTableOption { | ||
return func(req *createTableRequest) { | ||
req.PartitionSpec = spec | ||
} | ||
} | ||
|
||
func WithWriteOrder(order table.SortOrder) createTableOption { | ||
return func(req *createTableRequest) { | ||
req.WriteOrder = order | ||
} | ||
} | ||
|
||
func WithStageCreate() createTableOption { | ||
return func(req *createTableRequest) { | ||
req.StageCreate = true | ||
} | ||
} | ||
|
||
func WithProperties(props iceberg.Properties) createTableOption { | ||
return func(req *createTableRequest) { | ||
req.Props = props | ||
} | ||
} | ||
|
||
type createTableRequest struct { | ||
Name string `json:"name"` | ||
Location string `json:"location"` | ||
Schema *iceberg.Schema `json:"schema"` | ||
PartitionSpec iceberg.PartitionSpec `json:"partition-spec"` | ||
WriteOrder table.SortOrder `json:"write-order"` | ||
StageCreate bool `json:"stage-create"` | ||
Props iceberg.Properties `json:"properties"` | ||
} | ||
|
||
type oauthTokenResponse struct { | ||
AccessToken string `json:"access_token"` | ||
TokenType string `json:"token_type"` | ||
|
@@ -537,6 +617,20 @@ func checkValidNamespace(ident table.Identifier) error { | |
return nil | ||
} | ||
|
||
func (r *RestCatalog) tableFromResponse(identifier []string, metadata table.Metadata, loc string, config iceberg.Properties) (*table.Table, error) { | ||
id := identifier | ||
if r.name != "" { | ||
id = append([]string{r.name}, identifier...) | ||
} | ||
|
||
iofs, err := iceio.LoadFS(config, loc) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return table.New(id, metadata, loc, iofs), nil | ||
} | ||
|
||
func (r *RestCatalog) ListTables(ctx context.Context, namespace table.Identifier) ([]table.Identifier, error) { | ||
if err := checkValidNamespace(namespace); err != nil { | ||
return nil, err | ||
|
@@ -546,12 +640,8 @@ func (r *RestCatalog) ListTables(ctx context.Context, namespace table.Identifier | |
path := []string{"namespaces", ns, "tables"} | ||
|
||
type resp struct { | ||
Identifiers []struct { | ||
Namespace []string `json:"namespace"` | ||
Name string `json:"name"` | ||
} `json:"identifiers"` | ||
Identifiers []Identifier `json:"identifiers"` | ||
Comment on lines
-549
to
+643
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the identifier type used anywhere other than here? The reason I had done it inline here was because it was only used in this one spot and i didn't want it to get confused with |
||
} | ||
|
||
rsp, err := doGet[resp](ctx, r.baseURI, path, r.cl, map[int]error{http.StatusNotFound: ErrNoSuchNamespace}) | ||
if err != nil { | ||
return nil, err | ||
|
@@ -573,64 +663,151 @@ func splitIdentForPath(ident table.Identifier) (string, string, error) { | |
return strings.Join(NamespaceFromIdent(ident), namespaceSeparator), TableNameFromIdent(ident), nil | ||
} | ||
|
||
type tblResponse struct { | ||
MetadataLoc string `json:"metadata-location"` | ||
RawMetadata json.RawMessage `json:"metadata"` | ||
Config iceberg.Properties `json:"config"` | ||
Metadata table.Metadata `json:"-"` | ||
} | ||
func (r *RestCatalog) CreateTable(ctx context.Context, identifier table.Identifier, schema *iceberg.Schema, opts ...createTableOption) (*table.Table, error) { | ||
ns, tbl, err := splitIdentForPath(identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
func (t *tblResponse) UnmarshalJSON(b []byte) (err error) { | ||
type Alias tblResponse | ||
if err = json.Unmarshal(b, (*Alias)(t)); err != nil { | ||
return err | ||
payload := createTableRequest{ | ||
Name: tbl, | ||
Schema: schema, | ||
} | ||
for _, o := range opts { | ||
o(&payload) | ||
} | ||
|
||
t.Metadata, err = table.ParseMetadataBytes(t.RawMetadata) | ||
return | ||
ret, err := doPost[createTableRequest, loadTableResponse](ctx, r.baseURI, []string{"namespaces", ns, "tables"}, payload, | ||
r.cl, map[int]error{http.StatusNotFound: ErrNoSuchNamespace, http.StatusConflict: ErrTableAlreadyExists}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
config := maps.Clone(r.props) | ||
maps.Copy(config, ret.Metadata.Properties()) | ||
for k, v := range ret.Config { | ||
config[k] = v | ||
} | ||
Comment on lines
+688
to
+690
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why loop instead of just doing |
||
|
||
return r.tableFromResponse(identifier, ret.Metadata, ret.MetadataLoc, config) | ||
} | ||
|
||
func (r *RestCatalog) LoadTable(ctx context.Context, identifier table.Identifier, props iceberg.Properties) (*table.Table, error) { | ||
func (r *RestCatalog) RegisterTable(ctx context.Context, identifier table.Identifier, metadataLoc string) (*table.Table, error) { | ||
ns, tbl, err := splitIdentForPath(identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if props == nil { | ||
props = iceberg.Properties{} | ||
type payload struct { | ||
Name string `json:"name"` | ||
MetadataLoc string `json:"metadata-location"` | ||
} | ||
|
||
ret, err := doGet[tblResponse](ctx, r.baseURI, []string{"namespaces", ns, "tables", tbl}, | ||
r.cl, map[int]error{http.StatusNotFound: ErrNoSuchTable}) | ||
ret, err := doPost[payload, loadTableResponse](ctx, r.baseURI, []string{"namespaces", ns, "tables", tbl}, | ||
payload{Name: tbl, MetadataLoc: metadataLoc}, r.cl, map[int]error{http.StatusNotFound: ErrNoSuchNamespace, http.StatusConflict: ErrTableAlreadyExists}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
id := identifier | ||
if r.name != "" { | ||
id = append([]string{r.name}, identifier...) | ||
config := maps.Clone(r.props) | ||
maps.Copy(config, ret.Metadata.Properties()) | ||
for k, v := range ret.Config { | ||
config[k] = v | ||
} | ||
Comment on lines
+714
to
716
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same question, why not |
||
|
||
tblProps := maps.Clone(r.props) | ||
maps.Copy(tblProps, props) | ||
maps.Copy(tblProps, ret.Metadata.Properties()) | ||
return r.tableFromResponse(identifier, ret.Metadata, ret.MetadataLoc, config) | ||
} | ||
|
||
func (r *RestCatalog) LoadTable(ctx context.Context, identifier table.Identifier, props iceberg.Properties) (*table.Table, error) { | ||
ns, tbl, err := splitIdentForPath(identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
ret, err := doGet[loadTableResponse](ctx, r.baseURI, []string{"namespaces", ns, "tables", tbl}, | ||
r.cl, map[int]error{http.StatusNotFound: ErrNoSuchTable}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
config := maps.Clone(r.props) | ||
maps.Copy(config, props) | ||
maps.Copy(config, ret.Metadata.Properties()) | ||
for k, v := range ret.Config { | ||
tblProps[k] = v | ||
config[k] = v | ||
} | ||
|
||
return r.tableFromResponse(identifier, ret.Metadata, ret.MetadataLoc, config) | ||
} | ||
|
||
func (r *RestCatalog) UpdateTable(ctx context.Context, identifier table.Identifier, requirements []table.Requirement, updates []table.Update) (*table.Table, error) { | ||
ns, tbl, err := splitIdentForPath(identifier) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
iofs, err := iceio.LoadFS(tblProps, ret.MetadataLoc) | ||
ident := Identifier{ | ||
Namespace: NamespaceFromIdent(identifier), | ||
Name: tbl, | ||
} | ||
type payload struct { | ||
Identifier Identifier `json:"identifier"` | ||
Requirements []table.Requirement `json:"requirements"` | ||
Updates []table.Update `json:"updates"` | ||
} | ||
ret, err := doPost[payload, commitTableResponse](ctx, r.baseURI, []string{"namespaces", ns, "tables", tbl}, | ||
payload{Identifier: ident, Requirements: requirements, Updates: updates}, r.cl, | ||
map[int]error{http.StatusNotFound: ErrNoSuchTable, http.StatusConflict: ErrCommitFailed}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return table.New(id, ret.Metadata, ret.MetadataLoc, iofs), nil | ||
|
||
config := maps.Clone(r.props) | ||
maps.Copy(config, ret.Metadata.Properties()) | ||
|
||
return r.tableFromResponse(identifier, ret.Metadata, ret.MetadataLoc, config) | ||
} | ||
|
||
func (r *RestCatalog) DropTable(ctx context.Context, identifier table.Identifier) error { | ||
return fmt.Errorf("%w: [Rest Catalog] drop table", iceberg.ErrNotImplemented) | ||
func (r *RestCatalog) DropTable(ctx context.Context, identifier table.Identifier, purge bool) error { | ||
ns, tbl, err := splitIdentForPath(identifier) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
uri := r.baseURI.JoinPath("namespaces", ns, "tables", tbl) | ||
if purge { | ||
v := url.Values{} | ||
v.Set("purgeRequested", "true") | ||
uri.RawQuery = v.Encode() | ||
} | ||
|
||
_, err = doDelete[struct{}](ctx, uri, []string{}, r.cl, | ||
map[int]error{http.StatusNotFound: ErrNoSuchTable}) | ||
|
||
return err | ||
} | ||
|
||
func (r *RestCatalog) RenameTable(ctx context.Context, from, to table.Identifier) (*table.Table, error) { | ||
return nil, fmt.Errorf("%w: [Rest Catalog] rename table", iceberg.ErrNotImplemented) | ||
type payload struct { | ||
From Identifier `json:"from"` | ||
To Identifier `json:"to"` | ||
} | ||
f := Identifier{ | ||
Namespace: NamespaceFromIdent(from), | ||
Name: TableNameFromIdent(from), | ||
} | ||
t := Identifier{ | ||
Namespace: NamespaceFromIdent(to), | ||
Name: TableNameFromIdent(to), | ||
} | ||
|
||
_, err := doPost[payload, any](ctx, r.baseURI, []string{"tables", "rename"}, payload{From: f, To: t}, r.cl, | ||
map[int]error{http.StatusNotFound: ErrNoSuchTable}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return r.LoadTable(ctx, to, nil) | ||
} | ||
|
||
func (r *RestCatalog) CreateNamespace(ctx context.Context, namespace table.Identifier, props iceberg.Properties) error { | ||
|
@@ -710,3 +887,20 @@ func (r *RestCatalog) UpdateNamespaceProperties(ctx context.Context, namespace t | |
return doPost[payload, PropertiesUpdateSummary](ctx, r.baseURI, []string{"namespaces", ns, "properties"}, | ||
payload{Remove: removals, Updates: updates}, r.cl, map[int]error{http.StatusNotFound: ErrNoSuchNamespace}) | ||
} | ||
|
||
func (r *RestCatalog) CheckNamespaceExists(ctx context.Context, namespace table.Identifier) (bool, error) { | ||
if err := checkValidNamespace(namespace); err != nil { | ||
return false, err | ||
} | ||
|
||
_, err := doGet[struct{}](ctx, r.baseURI, []string{"namespaces", strings.Join(namespace, namespaceSeparator)}, | ||
r.cl, map[int]error{http.StatusNotFound: ErrNoSuchNamespace}) | ||
if err != nil { | ||
if errors.Is(err, ErrNoSuchNamespace) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
|
||
return true, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should add explanation of what the purge argument does. is that something that was recently added to the REST spec?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
delete:
tags:
- Catalog API
summary: Drop a table from the catalog
operationId: dropTable
description: Remove a table from the catalog
parameters:
- name: purgeRequested
in: query
required: false
description: Whether the user requested to purge the underlying table's data and metadata
schema:
type: boolean
default: false
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took this field directly from the OpenAPI specification:
https://github.com/apache/iceberg/blob/34cd01ba2e057866cdb13db8f9919bc98e11e638/open-api/rest-catalog-open-api.yaml#L1096