|
17 | 17 |
|
18 | 18 | from __future__ import annotations
|
19 | 19 |
|
| 20 | +from datetime import date |
20 | 21 | from typing import Any, Dict, List, Literal, Optional, Union
|
| 22 | +from uuid import UUID |
21 | 23 |
|
22 | 24 | from pydantic import BaseModel, Extra, Field
|
23 | 25 |
|
@@ -629,6 +631,183 @@ class PartitionStatisticsFile(BaseModel):
|
629 | 631 | file_size_in_bytes: int = Field(..., alias='file-size-in-bytes')
|
630 | 632 |
|
631 | 633 |
|
| 634 | +class BooleanTypeValue(BaseModel): |
| 635 | + __root__: bool = Field(..., example=True) |
| 636 | + |
| 637 | + |
| 638 | +class IntegerTypeValue(BaseModel): |
| 639 | + __root__: int = Field(..., example=42) |
| 640 | + |
| 641 | + |
| 642 | +class LongTypeValue(BaseModel): |
| 643 | + __root__: int = Field(..., example=9223372036854775807) |
| 644 | + |
| 645 | + |
| 646 | +class FloatTypeValue(BaseModel): |
| 647 | + __root__: float = Field(..., example=3.14) |
| 648 | + |
| 649 | + |
| 650 | +class DoubleTypeValue(BaseModel): |
| 651 | + __root__: float = Field(..., example=123.456) |
| 652 | + |
| 653 | + |
| 654 | +class DecimalTypeValue(BaseModel): |
| 655 | + __root__: str = Field( |
| 656 | + ..., |
| 657 | + description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'", |
| 658 | + example='123.4500', |
| 659 | + ) |
| 660 | + |
| 661 | + |
| 662 | +class StringTypeValue(BaseModel): |
| 663 | + __root__: str = Field(..., example='hello') |
| 664 | + |
| 665 | + |
| 666 | +class UUIDTypeValue(BaseModel): |
| 667 | + __root__: UUID = Field( |
| 668 | + ..., |
| 669 | + description='UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122', |
| 670 | + example='eb26bdb1-a1d8-4aa6-990e-da940875492c', |
| 671 | + max_length=36, |
| 672 | + min_length=36, |
| 673 | + regex='^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', |
| 674 | + ) |
| 675 | + |
| 676 | + |
| 677 | +class DateTypeValue(BaseModel): |
| 678 | + __root__: date = Field( |
| 679 | + ..., |
| 680 | + description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format", |
| 681 | + example='2007-12-03', |
| 682 | + ) |
| 683 | + |
| 684 | + |
| 685 | +class TimeTypeValue(BaseModel): |
| 686 | + __root__: str = Field( |
| 687 | + ..., |
| 688 | + description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision", |
| 689 | + example='22:31:08.123456', |
| 690 | + ) |
| 691 | + |
| 692 | + |
| 693 | +class TimestampTypeValue(BaseModel): |
| 694 | + __root__: str = Field( |
| 695 | + ..., |
| 696 | + description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision", |
| 697 | + example='2007-12-03T10:15:30.123456', |
| 698 | + ) |
| 699 | + |
| 700 | + |
| 701 | +class TimestampTzTypeValue(BaseModel): |
| 702 | + __root__: str = Field( |
| 703 | + ..., |
| 704 | + description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)", |
| 705 | + example='2007-12-03T10:15:30.123456+00:00', |
| 706 | + ) |
| 707 | + |
| 708 | + |
| 709 | +class TimestampNanoTypeValue(BaseModel): |
| 710 | + __root__: str = Field( |
| 711 | + ..., |
| 712 | + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision", |
| 713 | + example='2007-12-03T10:15:30.123456789', |
| 714 | + ) |
| 715 | + |
| 716 | + |
| 717 | +class TimestampTzNanoTypeValue(BaseModel): |
| 718 | + __root__: str = Field( |
| 719 | + ..., |
| 720 | + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)", |
| 721 | + example='2007-12-03T10:15:30.123456789+00:00', |
| 722 | + ) |
| 723 | + |
| 724 | + |
| 725 | +class FixedTypeValue(BaseModel): |
| 726 | + __root__: str = Field( |
| 727 | + ..., |
| 728 | + description='Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length', |
| 729 | + example='78797A', |
| 730 | + ) |
| 731 | + |
| 732 | + |
| 733 | +class BinaryTypeValue(BaseModel): |
| 734 | + __root__: str = Field( |
| 735 | + ..., |
| 736 | + description='Binary type values are stored and serialized as an uppercase hexadecimal string', |
| 737 | + example='78797A', |
| 738 | + ) |
| 739 | + |
| 740 | + |
| 741 | +class CountMap(BaseModel): |
| 742 | + keys: Optional[List[IntegerTypeValue]] = Field( |
| 743 | + None, description='List of integer column ids for each corresponding value' |
| 744 | + ) |
| 745 | + values: Optional[List[LongTypeValue]] = Field( |
| 746 | + None, description="List of Long values, matched to 'keys' by index" |
| 747 | + ) |
| 748 | + |
| 749 | + |
| 750 | +class PrimitiveTypeValue(BaseModel): |
| 751 | + __root__: Union[ |
| 752 | + BooleanTypeValue, |
| 753 | + IntegerTypeValue, |
| 754 | + LongTypeValue, |
| 755 | + FloatTypeValue, |
| 756 | + DoubleTypeValue, |
| 757 | + DecimalTypeValue, |
| 758 | + StringTypeValue, |
| 759 | + UUIDTypeValue, |
| 760 | + DateTypeValue, |
| 761 | + TimeTypeValue, |
| 762 | + TimestampTypeValue, |
| 763 | + TimestampTzTypeValue, |
| 764 | + TimestampNanoTypeValue, |
| 765 | + TimestampTzNanoTypeValue, |
| 766 | + FixedTypeValue, |
| 767 | + BinaryTypeValue, |
| 768 | + ] |
| 769 | + |
| 770 | + |
| 771 | +class FileFormat(BaseModel): |
| 772 | + __root__: Literal['avro', 'orc', 'parquet'] |
| 773 | + |
| 774 | + |
| 775 | +class ContentFile(BaseModel): |
| 776 | + content: str |
| 777 | + file_path: str = Field(..., alias='file-path') |
| 778 | + file_format: FileFormat = Field(..., alias='file-format') |
| 779 | + spec_id: int = Field(..., alias='spec-id') |
| 780 | + partition: Optional[List[PrimitiveTypeValue]] = Field( |
| 781 | + None, |
| 782 | + description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`', |
| 783 | + example=[1, 'bar'], |
| 784 | + ) |
| 785 | + file_size_in_bytes: int = Field( |
| 786 | + ..., alias='file-size-in-bytes', description='Total file size in bytes' |
| 787 | + ) |
| 788 | + record_count: int = Field( |
| 789 | + ..., alias='record-count', description='Number of records in the file' |
| 790 | + ) |
| 791 | + key_metadata: Optional[BinaryTypeValue] = Field( |
| 792 | + None, alias='key-metadata', description='Encryption key metadata blob' |
| 793 | + ) |
| 794 | + split_offsets: Optional[List[int]] = Field( |
| 795 | + None, alias='split-offsets', description='List of splittable offsets' |
| 796 | + ) |
| 797 | + sort_order_id: Optional[int] = Field(None, alias='sort-order-id') |
| 798 | + |
| 799 | + |
| 800 | +class PositionDeleteFile(ContentFile): |
| 801 | + content: Literal['position-deletes'] |
| 802 | + |
| 803 | + |
| 804 | +class EqualityDeleteFile(ContentFile): |
| 805 | + content: Literal['equality-deletes'] |
| 806 | + equality_ids: Optional[List[int]] = Field( |
| 807 | + None, alias='equality-ids', description='List of equality field IDs' |
| 808 | + ) |
| 809 | + |
| 810 | + |
632 | 811 | class CreateNamespaceRequest(BaseModel):
|
633 | 812 | namespace: Namespace
|
634 | 813 | properties: Optional[Dict[str, str]] = Field(
|
@@ -668,6 +847,47 @@ class StatisticsFile(BaseModel):
|
668 | 847 | blob_metadata: List[BlobMetadata] = Field(..., alias='blob-metadata')
|
669 | 848 |
|
670 | 849 |
|
| 850 | +class ValueMap(BaseModel): |
| 851 | + keys: Optional[List[IntegerTypeValue]] = Field( |
| 852 | + None, description='List of integer column ids for each corresponding value' |
| 853 | + ) |
| 854 | + values: Optional[List[PrimitiveTypeValue]] = Field( |
| 855 | + None, description="List of primitive type values, matched to 'keys' by index" |
| 856 | + ) |
| 857 | + |
| 858 | + |
| 859 | +class DataFile(ContentFile): |
| 860 | + content: Literal['data'] |
| 861 | + column_sizes: Optional[CountMap] = Field( |
| 862 | + None, |
| 863 | + alias='column-sizes', |
| 864 | + description='Map of column id to total count, including null and NaN', |
| 865 | + ) |
| 866 | + value_counts: Optional[CountMap] = Field( |
| 867 | + None, alias='value-counts', description='Map of column id to null value count' |
| 868 | + ) |
| 869 | + null_value_counts: Optional[CountMap] = Field( |
| 870 | + None, |
| 871 | + alias='null-value-counts', |
| 872 | + description='Map of column id to null value count', |
| 873 | + ) |
| 874 | + nan_value_counts: Optional[CountMap] = Field( |
| 875 | + None, |
| 876 | + alias='nan-value-counts', |
| 877 | + description='Map of column id to number of NaN values in the column', |
| 878 | + ) |
| 879 | + lower_bounds: Optional[ValueMap] = Field( |
| 880 | + None, |
| 881 | + alias='lower-bounds', |
| 882 | + description='Map of column id to lower bound primitive type values', |
| 883 | + ) |
| 884 | + upper_bounds: Optional[ValueMap] = Field( |
| 885 | + None, |
| 886 | + alias='upper-bounds', |
| 887 | + description='Map of column id to upper bound primitive type values', |
| 888 | + ) |
| 889 | + |
| 890 | + |
671 | 891 | class Term(BaseModel):
|
672 | 892 | __root__: Union[Reference, TransformTerm]
|
673 | 893 |
|
|
0 commit comments