Skip to content

Commit 00b2886

Browse files
geruhbitsondatadev
authored andcommitted
REST spec: Add ContentFile types to spec for the PreplanTable and PlanTable API (apache#9717)
1 parent 2438a3e commit 00b2886

File tree

2 files changed

+495
-0
lines changed

2 files changed

+495
-0
lines changed

open-api/rest-catalog-open-api.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
from __future__ import annotations
1919

20+
from datetime import date
2021
from typing import Any, Dict, List, Literal, Optional, Union
22+
from uuid import UUID
2123

2224
from pydantic import BaseModel, Extra, Field
2325

@@ -629,6 +631,183 @@ class PartitionStatisticsFile(BaseModel):
629631
file_size_in_bytes: int = Field(..., alias='file-size-in-bytes')
630632

631633

634+
class BooleanTypeValue(BaseModel):
635+
__root__: bool = Field(..., example=True)
636+
637+
638+
class IntegerTypeValue(BaseModel):
639+
__root__: int = Field(..., example=42)
640+
641+
642+
class LongTypeValue(BaseModel):
643+
__root__: int = Field(..., example=9223372036854775807)
644+
645+
646+
class FloatTypeValue(BaseModel):
647+
__root__: float = Field(..., example=3.14)
648+
649+
650+
class DoubleTypeValue(BaseModel):
651+
__root__: float = Field(..., example=123.456)
652+
653+
654+
class DecimalTypeValue(BaseModel):
655+
__root__: str = Field(
656+
...,
657+
description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'",
658+
example='123.4500',
659+
)
660+
661+
662+
class StringTypeValue(BaseModel):
663+
__root__: str = Field(..., example='hello')
664+
665+
666+
class UUIDTypeValue(BaseModel):
667+
__root__: UUID = Field(
668+
...,
669+
description='UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122',
670+
example='eb26bdb1-a1d8-4aa6-990e-da940875492c',
671+
max_length=36,
672+
min_length=36,
673+
regex='^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
674+
)
675+
676+
677+
class DateTypeValue(BaseModel):
678+
__root__: date = Field(
679+
...,
680+
description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format",
681+
example='2007-12-03',
682+
)
683+
684+
685+
class TimeTypeValue(BaseModel):
686+
__root__: str = Field(
687+
...,
688+
description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
689+
example='22:31:08.123456',
690+
)
691+
692+
693+
class TimestampTypeValue(BaseModel):
694+
__root__: str = Field(
695+
...,
696+
description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
697+
example='2007-12-03T10:15:30.123456',
698+
)
699+
700+
701+
class TimestampTzTypeValue(BaseModel):
702+
__root__: str = Field(
703+
...,
704+
description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)",
705+
example='2007-12-03T10:15:30.123456+00:00',
706+
)
707+
708+
709+
class TimestampNanoTypeValue(BaseModel):
710+
__root__: str = Field(
711+
...,
712+
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision",
713+
example='2007-12-03T10:15:30.123456789',
714+
)
715+
716+
717+
class TimestampTzNanoTypeValue(BaseModel):
718+
__root__: str = Field(
719+
...,
720+
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)",
721+
example='2007-12-03T10:15:30.123456789+00:00',
722+
)
723+
724+
725+
class FixedTypeValue(BaseModel):
726+
__root__: str = Field(
727+
...,
728+
description='Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length',
729+
example='78797A',
730+
)
731+
732+
733+
class BinaryTypeValue(BaseModel):
734+
__root__: str = Field(
735+
...,
736+
description='Binary type values are stored and serialized as an uppercase hexadecimal string',
737+
example='78797A',
738+
)
739+
740+
741+
class CountMap(BaseModel):
742+
keys: Optional[List[IntegerTypeValue]] = Field(
743+
None, description='List of integer column ids for each corresponding value'
744+
)
745+
values: Optional[List[LongTypeValue]] = Field(
746+
None, description="List of Long values, matched to 'keys' by index"
747+
)
748+
749+
750+
class PrimitiveTypeValue(BaseModel):
751+
__root__: Union[
752+
BooleanTypeValue,
753+
IntegerTypeValue,
754+
LongTypeValue,
755+
FloatTypeValue,
756+
DoubleTypeValue,
757+
DecimalTypeValue,
758+
StringTypeValue,
759+
UUIDTypeValue,
760+
DateTypeValue,
761+
TimeTypeValue,
762+
TimestampTypeValue,
763+
TimestampTzTypeValue,
764+
TimestampNanoTypeValue,
765+
TimestampTzNanoTypeValue,
766+
FixedTypeValue,
767+
BinaryTypeValue,
768+
]
769+
770+
771+
class FileFormat(BaseModel):
772+
__root__: Literal['avro', 'orc', 'parquet']
773+
774+
775+
class ContentFile(BaseModel):
776+
content: str
777+
file_path: str = Field(..., alias='file-path')
778+
file_format: FileFormat = Field(..., alias='file-format')
779+
spec_id: int = Field(..., alias='spec-id')
780+
partition: Optional[List[PrimitiveTypeValue]] = Field(
781+
None,
782+
description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
783+
example=[1, 'bar'],
784+
)
785+
file_size_in_bytes: int = Field(
786+
..., alias='file-size-in-bytes', description='Total file size in bytes'
787+
)
788+
record_count: int = Field(
789+
..., alias='record-count', description='Number of records in the file'
790+
)
791+
key_metadata: Optional[BinaryTypeValue] = Field(
792+
None, alias='key-metadata', description='Encryption key metadata blob'
793+
)
794+
split_offsets: Optional[List[int]] = Field(
795+
None, alias='split-offsets', description='List of splittable offsets'
796+
)
797+
sort_order_id: Optional[int] = Field(None, alias='sort-order-id')
798+
799+
800+
class PositionDeleteFile(ContentFile):
801+
content: Literal['position-deletes']
802+
803+
804+
class EqualityDeleteFile(ContentFile):
805+
content: Literal['equality-deletes']
806+
equality_ids: Optional[List[int]] = Field(
807+
None, alias='equality-ids', description='List of equality field IDs'
808+
)
809+
810+
632811
class CreateNamespaceRequest(BaseModel):
633812
namespace: Namespace
634813
properties: Optional[Dict[str, str]] = Field(
@@ -668,6 +847,47 @@ class StatisticsFile(BaseModel):
668847
blob_metadata: List[BlobMetadata] = Field(..., alias='blob-metadata')
669848

670849

850+
class ValueMap(BaseModel):
851+
keys: Optional[List[IntegerTypeValue]] = Field(
852+
None, description='List of integer column ids for each corresponding value'
853+
)
854+
values: Optional[List[PrimitiveTypeValue]] = Field(
855+
None, description="List of primitive type values, matched to 'keys' by index"
856+
)
857+
858+
859+
class DataFile(ContentFile):
860+
content: Literal['data']
861+
column_sizes: Optional[CountMap] = Field(
862+
None,
863+
alias='column-sizes',
864+
description='Map of column id to total count, including null and NaN',
865+
)
866+
value_counts: Optional[CountMap] = Field(
867+
None, alias='value-counts', description='Map of column id to null value count'
868+
)
869+
null_value_counts: Optional[CountMap] = Field(
870+
None,
871+
alias='null-value-counts',
872+
description='Map of column id to null value count',
873+
)
874+
nan_value_counts: Optional[CountMap] = Field(
875+
None,
876+
alias='nan-value-counts',
877+
description='Map of column id to number of NaN values in the column',
878+
)
879+
lower_bounds: Optional[ValueMap] = Field(
880+
None,
881+
alias='lower-bounds',
882+
description='Map of column id to lower bound primitive type values',
883+
)
884+
upper_bounds: Optional[ValueMap] = Field(
885+
None,
886+
alias='upper-bounds',
887+
description='Map of column id to upper bound primitive type values',
888+
)
889+
890+
671891
class Term(BaseModel):
672892
__root__: Union[Reference, TransformTerm]
673893

0 commit comments

Comments
 (0)