Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
314 commits
Select commit Hold shift + click to select a range
5e78425
Python: Add Flake8 for simple checks (#5073)
Fokko Jun 17, 2022
bb3fc33
Flink: FLIP-27 source enumerator (#4986)
stevenzwu Jun 17, 2022
86afe8f
Puffin: Inline PuffinWriter.writeFully (#5088)
findepi Jun 20, 2022
9ab2ecd
Open-api: RenameTableRequest props are required (#5093)
Fokko Jun 20, 2022
fff6680
Python: Add py.typed file for annotations (#5091)
Fokko Jun 20, 2022
412ab0e
Include actual class name in class mismatch exception (#5089)
findepi Jun 20, 2022
1b21e1a
Fix type of fields list in Puffin (#5087)
findepi Jun 20, 2022
21342a7
Python: Add Avro read path (#4920)
Fokko Jun 20, 2022
f9727dc
Flink: Port #4986, FLIP-27 source enumerator to 1.14 module (#5078)
stevenzwu Jun 20, 2022
8472db3
Core: Add Javadoc for LoadTableResponse, fix error prone warnings (#5…
kbendick Jun 20, 2022
be7e5c6
Core: Add serialization tests for CreateTableRequest (#5052)
kbendick Jun 20, 2022
5416dc7
Python: Bump pre-commit versions (#5074)
Fokko Jun 20, 2022
6b1ec30
AWS: add skip name validation to isValidIdentifier (#5081)
xingfanx Jun 20, 2022
04ed929
Python: Replace vars with cached_property decorator (#5068)
Fokko Jun 21, 2022
255f011
Core: Send format version for create table in REST catalog (#5084)
bryanck Jun 21, 2022
1090aa7
Python: Renable pylint (#5066)
Fokko Jun 21, 2022
a8102b4
Core: Check for valid identifiers in REST catalog (#5107)
bryanck Jun 21, 2022
97c906e
Hive: Print db and table name while acquiring hive meta-store lock (#…
krisdas Jun 22, 2022
69fde49
Core: Metadata table queries fail if a partition column was reused in…
szlta Jun 22, 2022
4ea5e42
Core: Support FileIO prefix operations (#5096)
danielcweeks Jun 22, 2022
901ae2e
Core: Remove Usage of Sets.Union in Loop (#5114)
RussellSpitzer Jun 22, 2022
b05494e
Parquet: Fix VectorizedParquetDefinitionLevelReader with direct ByteB…
huaxingao Jun 23, 2022
22e89c7
Nessie: Fix drop/rename table with TableReference in identifier (#5033)
ajantha-bhat Jun 23, 2022
88fc280
Spec: Typo an to a (#5123)
Fokko Jun 23, 2022
d3c39a7
Python: Add flake8 bugbear (#5117)
Fokko Jun 23, 2022
6bd6d9d
Core: Reuse REST client for catalog and table operations (#5125)
danielcweeks Jun 23, 2022
4c5eec9
Spark 3.2: RewriteDataFiles - Escape special characters in table iden…
bijanhoule Jun 23, 2022
65620e2
Docs: Change docs layout for new iceberg-theme (#5115)
samredai Jun 23, 2022
4734b7d
Flink: Add option to limit the number of snapshots in a planning oper…
chenjunjiedada Jun 24, 2022
3a1217e
ORC: Upgrade to 1.7.5 (#5070)
williamhyun Jun 24, 2022
a143309
Python: Add flake8-comprehensions (#5130)
Fokko Jun 24, 2022
ef0b085
Docs: Use more specific hidden partitioning anchor link (#5128)
a49a Jun 24, 2022
840bf21
Flink: FLIP-27 Iceberg source and builder (#5109)
stevenzwu Jun 24, 2022
85e2c84
Spark: Add compute stats to scan builder also (#5136)
bryanck Jun 27, 2022
8e2aa72
Parquet: Release the compressor when closing ParquetWriter (#5126)
hmit Jun 27, 2022
85a9213
Checkstyle: Add rule for %d in Preconditions.checkArgument (#5057)
xrl1 Jun 27, 2022
d4de393
Arrow: Avoid extra dictionary buffer copy (#5137)
bryanck Jun 27, 2022
9cea732
Core: Fix REST field name case strategy (#5133)
rdblue Jun 27, 2022
7b4ad8c
Python: Update dependencies (#5142)
Fokko Jun 27, 2022
7dd7825
Docs: A few small fixes to docs markdown files (#5127)
samredai Jun 27, 2022
6597ea4
Python: Use Pydantic for serialization and deserialization (#5011)
Fokko Jun 27, 2022
a5109a0
Core: Fix partition clustering to produce table sort order (#5131)
rdblue Jun 27, 2022
c373141
Core: Fix tag ancestor snapshot handling (#5034)
rdblue Jun 28, 2022
ec84de7
Build: Print used JDK version (#5138)
nastra Jun 28, 2022
4eb1ac0
Spark: Validate HMS uri in SparkSessionCatalog (#5134)
flyrain Jun 28, 2022
c882d11
Spark: Add __metadata_col for metadata columns when converting types …
singhpk234 Jun 28, 2022
6929af0
Revert "Spark: Use original option keys in SparkCatalog (#4903)"
rdblue Jun 28, 2022
6032350
MR: Clone ANYWHERE location array IcebergSplit (#4984)
findepi Jun 28, 2022
d1c8d23
API: Refactor ScanTask hierarchy (#5077)
aokolnychyi Jun 28, 2022
0177788
Core: Add source snapshot info to Puffin Blob metadata (#5129)
findepi Jun 28, 2022
6793a7d
API: Support composite types in Accessors (#5067)
dimas-b Jun 28, 2022
15692cb
Docs: Add reference to Apache Impala documentation (#5146)
samredai Jun 28, 2022
91964ad
Core: Fix CreateTableRequest to use field names from the REST spec (#…
kbendick Jun 28, 2022
9d81dec
Core: Update CreateTableRequest Javadoc (#5144)
kbendick Jun 28, 2022
6b51d5e
move spark/v3.2 to spark/v3.3
Jun 28, 2022
fc4d6af
make spark 3.3 work
Jun 20, 2022
298c931
copy 3.2 files from 3.3
Jun 28, 2022
a946079
enable ci for 3.2
Jun 20, 2022
9c6c0f9
Docs: Update README.md project description.
rdblue Jun 28, 2022
06eaf41
Spec: Fix typo in REST catalog spec (#5152)
kbendick Jun 28, 2022
be72d1a
Python: Make the VoidTransform a singleton (#5149)
Fokko Jun 28, 2022
207c1f8
Spark: Fix regression from Scan refactor (#5143)
bryanck Jun 28, 2022
05ba796
Core: Test serialization of LoadTableResponse (#5118)
kbendick Jun 29, 2022
828890e
Spark 3.3: Support AS OF syntax in Spark SQL for time travel (#5156)
singhpk234 Jun 29, 2022
3971770
Python: Resolve write/read schemas (#5116)
Fokko Jun 29, 2022
05ea553
Core: Make StreamingDelete public (#5148)
gustavoatt Jun 29, 2022
72f1095
Spark: Port performance fix for Spark 3.2 to 3.3 (#5155)
bryanck Jun 29, 2022
afededf
Spark 3.x: Use original option keys in SparkCatalog (#5044)
rajarshisarkar Jun 29, 2022
68f5294
Docs: Add Flink SQL client -i docs (#5003)
ymZhao1001 Jun 29, 2022
46e30ac
Spec: Fix language in spec ORC type notes (#4976)
szehon-ho Jun 29, 2022
4bc4f89
Nessie: Update to 0.30.0 (#4780)
snazy Jun 29, 2022
3ce6b2c
Core: Avoid snapshot ID collisions (#4747)
ajantha-bhat Jun 29, 2022
eecaa73
Flink: Remove unnecessary switch statement (#4737)
Jun 29, 2022
866a500
AWS: Support overriding endpoint in DynamoDB (#4726)
ebyhr Jun 29, 2022
1270788
API: Fix version exception when registering metadata (#4946)
ajantha-bhat Jun 29, 2022
7044482
Core: Add reference_snapshot_id filter column to all_manifests table …
szehon-ho Jun 29, 2022
30d31fb
Docs: Update Hive doc page with the 4.0.0-alpha-1 features (#5161)
samredai Jun 30, 2022
d52826f
API: Fix typo Canout to Cannot (#5164)
bianqi520 Jun 30, 2022
17ed760
Python: Add truncate transform (#5030)
jun-he Jun 30, 2022
ef7f925
API: Access deleted and added delete files in Snapshot (#5105)
aokolnychyi Jun 30, 2022
248596a
Python: Rename python top-level module pyiceberg, remove src (#5169)
samredai Jun 30, 2022
43e12fd
Parquet: Add bloom filter options to the write path (#5035)
huaxingao Jun 30, 2022
a74d24b
Parquet: Fix bloom filter check for corrupt files (#5172)
huaxingao Jun 30, 2022
097f05f
Python: Move Transforms to Pydantic (#5170)
Fokko Jul 1, 2022
3979279
Spec: Sort-order order-id is mandatory (#5177)
Fokko Jul 1, 2022
fd1a9f1
Arrow: Pad decimal bytes before passing to decimal vector (#5168)
bryanck Jul 1, 2022
a91a08c
API: Add generic FileIO JSON serialization (#5178)
rdblue Jul 3, 2022
cc62c71
Python: Pin bugbear in Python legacy (#5184)
Fokko Jul 3, 2022
5e9b8e3
API: Add a scan for changes (#4870)
aokolnychyi Jul 3, 2022
81083a9
Flink: Support write options in FlinkSink builder (#3998)
hililiwei Jul 3, 2022
5f4f58a
Spark 3.3: Re-enable 2-level Parquet list test (#5179)
singhpk234 Jul 3, 2022
ce6ca69
API: Add java type to composite TypeID enums (#5154)
dimas-b Jul 3, 2022
945e9c6
Docs: Add Flink and Iceberg type compatibility tables (#4865)
wuwenchi Jul 3, 2022
b2e057a
Flink: Fix typo in FlinkSink string (#5176)
a49a Jul 5, 2022
736f71f
Docs: Fix typo in docs (#5202)
samredai Jul 5, 2022
8cd17e2
Core: Fix ErrorProne Warnings (#5200)
nastra Jul 5, 2022
62bd5a5
Arrow: Fix for dictionary encoded fixed length binary decimals (#5198)
bryanck Jul 5, 2022
0b9bb70
Restricting toBranch only for BaseRowDelta
Jul 9, 2022
003b49a
Adding check to see if it's a branch
Jun 13, 2022
f2001ff
Validation check for branch
Jul 2, 2022
306ad7d
Python: Remove typing extension imports (#4917)
Fokko Jun 2, 2022
08ef852
Spark: Provide Procedure for Catalog Register Procedure (#4810)
RussellSpitzer Jun 3, 2022
f966e01
Remove version.txt from master (#4955)
singhpk234 Jun 3, 2022
3e3e50e
Core: Support metadata table loading in the REST catalog. (#4950)
rdblue Jun 3, 2022
5c362dd
Python: Bump pre-commit hooks (#4934)
Fokko Jun 3, 2022
57ceb4d
Use CatalogUtil classloader instead of context classloader for FileIO…
danielcweeks Jun 3, 2022
2ffb831
API: Add expression equivalence testing (#4947)
rdblue Jun 3, 2022
9d41d62
Core: Fix token refresh thread shutdown (#4958)
bryanck Jun 5, 2022
dfcd168
Docs: Fix broken link to expiration properties (#4939)
polaris6 Jun 5, 2022
57c8402
Python: Use struct constant to improve pack/unpack performance (#4929)
chulucninh09 Jun 5, 2022
a8ce718
Python: Add pyupgrade as a pre-commit hook (#4935)
Fokko Jun 5, 2022
5f7e4a4
Spec: Clarify truncate transform for strings is based on code points …
emkornfield Jun 5, 2022
69783cb
Spark: Use original option keys in SparkCatalog (#4903)
rajarshisarkar Jun 5, 2022
ef8a690
Python: Add Poetry build and release instructions (#4844)
Fokko Jun 6, 2022
52cb6c8
Python: Add more tests for schema_conversion (#4972)
Fokko Jun 6, 2022
d3ae6b4
Python: Decimal is actually DecimalType (#4967)
Fokko Jun 6, 2022
73fd66b
API: Fix revapi failure, allow BoudnTerm.isEquivalentTo (#4965)
findepi Jun 6, 2022
03991d3
Python: Disable autouse of fixtures (#4964)
Fokko Jun 6, 2022
b24b602
Spark: Add CommitMetadata class to pass additional snapshot propertie…
CodingCat Jun 6, 2022
a3eb890
Flink: FLIP-27 source enumerator help classes (#4329)
stevenzwu Jun 6, 2022
273c9c2
Spec: Add Puffin, a file format for stats and indexes (#4944)
findepi Jun 6, 2022
152394a
Flink 1.15: Port PR #4329 to add FLIP-27 enumerator classes (#4979)
stevenzwu Jun 7, 2022
6d63289
Spark 3.2: Use SnapshotSummary to deduce latestOffset (#4517)
singhpk234 Jun 7, 2022
241762f
Doc: Fix UnescapedEntity HTML javadoc warning (#4987)
ericlgoodman Jun 7, 2022
442f127
Checkstyle: fix reference of non canonical subclass (#4990)
xingfanx Jun 7, 2022
f9bdade
Core, Spec: Update REST namespace separator to unit separator, 0x1F (…
danielcweeks Jun 8, 2022
ed4c913
Core: Replace unnecessary hash uses with hashCode (#4953)
amogh-jahagirdar Jun 8, 2022
0d02c23
Build: Fix API compatibility Github Action (#4989)
kbendick Jun 8, 2022
4bc37dd
Python: Add Poetry IDEA instructions (#4980)
Fokko Jun 9, 2022
fc68058
Python: Change Python API naming to the spec (#4992)
Fokko Jun 9, 2022
0d9b6ca
Flink: Reduce the scope and duration of holding checkpoint lock in th…
stevenzwu Jun 10, 2022
c1a3985
Core: Resolve environment variables in REST catalog config (#5018)
rdblue Jun 12, 2022
f9684bd
Spark: Fix flaky testOlderThanTimestamp in TestRemoveOrphanFilesActio…
sumeetgajjar Jun 12, 2022
46917ef
Python: Extend the use of the Singleton (#5008)
Fokko Jun 12, 2022
fa60596
Python: Add PartitionSpec (#4717)
dramaticlly Jun 12, 2022
7cd0312
Core: Rename JDBC namespace property column to non-keyword (#5017)
sungwy Jun 12, 2022
3378f61
Docs: Restore Iceberg logo in README (#5016)
MasseGuillaume Jun 12, 2022
6cc98ce
Parquet: Fix NPE in logical type handling (#4999)
ConeyLiu Jun 12, 2022
1f00971
Core: Improve refresh executor shutdown in REST catalog close (#4991)
bryanck Jun 12, 2022
977453a
Parquet: Support row group bloom filters (#4938)
huaxingao Jun 13, 2022
0341974
Docs: Fix copy paste: s/parquet/avro/ (#5020)
Fokko Jun 13, 2022
5d8aaa1
Python: Add identity transform (#4908)
jun-he Jun 13, 2022
4d37d69
Core: Enforce close for in-memory files (#5019)
findepi Jun 14, 2022
5c9225e
Build: Avoid running engine tests on LICENSE and NOTICE update (#5022)
singhpk234 Jun 14, 2022
974e5ee
Core: Update ExpireSnapshots impl for branching and tagging (#4578)
amogh-jahagirdar Jun 14, 2022
a133885
Nessie: Properly close all resources in NessieCatalog (#5047)
nastra Jun 15, 2022
62ac4b0
Core: Add reader and writer for Puffin index and stats format (#4537)
findepi Jun 15, 2022
a495360
AWS: add skip name validation config for glue namespace and table (#5…
xingfanx Jun 15, 2022
50020a6
Python: Don't use a metaclass for the Singleton (#5055)
Fokko Jun 15, 2022
e7c8022
Core: In HadoopTableOperations, replace Util.getFs call with getFileS…
chakradhar2 Jun 16, 2022
2adad78
Python: Add Flake8 for simple checks (#5073)
Fokko Jun 17, 2022
c2be8cc
Flink: FLIP-27 source enumerator (#4986)
stevenzwu Jun 17, 2022
c99dc55
Puffin: Inline PuffinWriter.writeFully (#5088)
findepi Jun 20, 2022
92c784a
Open-api: RenameTableRequest props are required (#5093)
Fokko Jun 20, 2022
087abc0
Python: Add py.typed file for annotations (#5091)
Fokko Jun 20, 2022
a568475
Include actual class name in class mismatch exception (#5089)
findepi Jun 20, 2022
1e6d0ba
Fix type of fields list in Puffin (#5087)
findepi Jun 20, 2022
d26c498
Python: Add Avro read path (#4920)
Fokko Jun 20, 2022
6d2424e
Flink: Port #4986, FLIP-27 source enumerator to 1.14 module (#5078)
stevenzwu Jun 20, 2022
b6ad99d
Core: Add Javadoc for LoadTableResponse, fix error prone warnings (#5…
kbendick Jun 20, 2022
1cac35d
Core: Add serialization tests for CreateTableRequest (#5052)
kbendick Jun 20, 2022
b62d1b9
Python: Bump pre-commit versions (#5074)
Fokko Jun 20, 2022
a7216e1
AWS: add skip name validation to isValidIdentifier (#5081)
xingfanx Jun 20, 2022
c72d5f8
Python: Replace vars with cached_property decorator (#5068)
Fokko Jun 21, 2022
25e51a2
Core: Send format version for create table in REST catalog (#5084)
bryanck Jun 21, 2022
a3620de
Python: Renable pylint (#5066)
Fokko Jun 21, 2022
47f6cb8
Core: Check for valid identifiers in REST catalog (#5107)
bryanck Jun 21, 2022
ceb181d
Hive: Print db and table name while acquiring hive meta-store lock (#…
krisdas Jun 22, 2022
85eab2d
Core: Metadata table queries fail if a partition column was reused in…
szlta Jun 22, 2022
90c0ddb
Core: Support FileIO prefix operations (#5096)
danielcweeks Jun 22, 2022
2e18c1a
Core: Remove Usage of Sets.Union in Loop (#5114)
RussellSpitzer Jun 22, 2022
6420f17
Parquet: Fix VectorizedParquetDefinitionLevelReader with direct ByteB…
huaxingao Jun 23, 2022
37f0013
Nessie: Fix drop/rename table with TableReference in identifier (#5033)
ajantha-bhat Jun 23, 2022
bc0c652
Spec: Typo an to a (#5123)
Fokko Jun 23, 2022
fd10183
Python: Add flake8 bugbear (#5117)
Fokko Jun 23, 2022
fbf3975
Core: Reuse REST client for catalog and table operations (#5125)
danielcweeks Jun 23, 2022
364e5f8
Spark 3.2: RewriteDataFiles - Escape special characters in table iden…
bijanhoule Jun 23, 2022
ca6f701
Docs: Change docs layout for new iceberg-theme (#5115)
samredai Jun 23, 2022
effcb4d
Flink: Add option to limit the number of snapshots in a planning oper…
chenjunjiedada Jun 24, 2022
3b3494e
ORC: Upgrade to 1.7.5 (#5070)
williamhyun Jun 24, 2022
4cdb331
Python: Add flake8-comprehensions (#5130)
Fokko Jun 24, 2022
7ff043e
Docs: Use more specific hidden partitioning anchor link (#5128)
a49a Jun 24, 2022
56375f8
Flink: FLIP-27 Iceberg source and builder (#5109)
stevenzwu Jun 24, 2022
aa7c817
Spark: Add compute stats to scan builder also (#5136)
bryanck Jun 27, 2022
bb4112a
Parquet: Release the compressor when closing ParquetWriter (#5126)
hmit Jun 27, 2022
a595d7b
Checkstyle: Add rule for %d in Preconditions.checkArgument (#5057)
xrl1 Jun 27, 2022
bb437a1
Arrow: Avoid extra dictionary buffer copy (#5137)
bryanck Jun 27, 2022
361a58a
Core: Fix REST field name case strategy (#5133)
rdblue Jun 27, 2022
6540d3e
Python: Update dependencies (#5142)
Fokko Jun 27, 2022
24a6032
Docs: A few small fixes to docs markdown files (#5127)
samredai Jun 27, 2022
6720de5
Python: Use Pydantic for serialization and deserialization (#5011)
Fokko Jun 27, 2022
086c197
Core: Fix partition clustering to produce table sort order (#5131)
rdblue Jun 27, 2022
5d693a8
Core: Fix tag ancestor snapshot handling (#5034)
rdblue Jun 28, 2022
89b0404
Build: Print used JDK version (#5138)
nastra Jun 28, 2022
7691f2a
Spark: Validate HMS uri in SparkSessionCatalog (#5134)
flyrain Jun 28, 2022
ee89e31
Spark: Add __metadata_col for metadata columns when converting types …
singhpk234 Jun 28, 2022
cb17a30
Revert "Spark: Use original option keys in SparkCatalog (#4903)"
rdblue Jun 28, 2022
35555c7
MR: Clone ANYWHERE location array IcebergSplit (#4984)
findepi Jun 28, 2022
ae4db79
API: Refactor ScanTask hierarchy (#5077)
aokolnychyi Jun 28, 2022
bcc634f
Core: Add source snapshot info to Puffin Blob metadata (#5129)
findepi Jun 28, 2022
0c5b4d3
API: Support composite types in Accessors (#5067)
dimas-b Jun 28, 2022
b72d415
Docs: Add reference to Apache Impala documentation (#5146)
samredai Jun 28, 2022
c9fe86d
Core: Fix CreateTableRequest to use field names from the REST spec (#…
kbendick Jun 28, 2022
1313a10
Core: Update CreateTableRequest Javadoc (#5144)
kbendick Jun 28, 2022
1144857
move spark/v3.2 to spark/v3.3
Jun 28, 2022
74a2a3f
make spark 3.3 work
Jun 20, 2022
21f3146
copy 3.2 files from 3.3
Jun 28, 2022
e8d5748
enable ci for 3.2
Jun 20, 2022
3af2fc9
Docs: Update README.md project description.
rdblue Jun 28, 2022
938fad9
Spec: Fix typo in REST catalog spec (#5152)
kbendick Jun 28, 2022
ece4af0
Python: Make the VoidTransform a singleton (#5149)
Fokko Jun 28, 2022
fd6b298
Spark: Fix regression from Scan refactor (#5143)
bryanck Jun 28, 2022
fb82817
Core: Test serialization of LoadTableResponse (#5118)
kbendick Jun 29, 2022
cb5495f
Spark 3.3: Support AS OF syntax in Spark SQL for time travel (#5156)
singhpk234 Jun 29, 2022
468dc07
Python: Resolve write/read schemas (#5116)
Fokko Jun 29, 2022
35ea584
Core: Make StreamingDelete public (#5148)
gustavoatt Jun 29, 2022
54768fb
Spark: Port performance fix for Spark 3.2 to 3.3 (#5155)
bryanck Jun 29, 2022
cecfb5d
Spark 3.x: Use original option keys in SparkCatalog (#5044)
rajarshisarkar Jun 29, 2022
b3815ed
Docs: Add Flink SQL client -i docs (#5003)
ymZhao1001 Jun 29, 2022
47877ac
Spec: Fix language in spec ORC type notes (#4976)
szehon-ho Jun 29, 2022
83cc53f
Nessie: Update to 0.30.0 (#4780)
snazy Jun 29, 2022
09ea8e9
Core: Avoid snapshot ID collisions (#4747)
ajantha-bhat Jun 29, 2022
6098862
Flink: Remove unnecessary switch statement (#4737)
Jun 29, 2022
9a26eb5
AWS: Support overriding endpoint in DynamoDB (#4726)
ebyhr Jun 29, 2022
3f5b65a
API: Fix version exception when registering metadata (#4946)
ajantha-bhat Jun 29, 2022
d36ee69
Core: Add reference_snapshot_id filter column to all_manifests table …
szehon-ho Jun 29, 2022
e39845e
Docs: Update Hive doc page with the 4.0.0-alpha-1 features (#5161)
samredai Jun 30, 2022
183f599
API: Fix typo Canout to Cannot (#5164)
bianqi520 Jun 30, 2022
9c47065
Python: Add truncate transform (#5030)
jun-he Jun 30, 2022
c5f5b07
API: Access deleted and added delete files in Snapshot (#5105)
aokolnychyi Jun 30, 2022
3b8a67f
Python: Rename python top-level module pyiceberg, remove src (#5169)
samredai Jun 30, 2022
12f072f
Parquet: Add bloom filter options to the write path (#5035)
huaxingao Jun 30, 2022
03e289b
Parquet: Fix bloom filter check for corrupt files (#5172)
huaxingao Jun 30, 2022
ff27c62
Python: Move Transforms to Pydantic (#5170)
Fokko Jul 1, 2022
c2ab5b2
Spec: Sort-order order-id is mandatory (#5177)
Fokko Jul 1, 2022
41211fa
Arrow: Pad decimal bytes before passing to decimal vector (#5168)
bryanck Jul 1, 2022
76340b8
API: Add generic FileIO JSON serialization (#5178)
rdblue Jul 3, 2022
8331c77
Python: Pin bugbear in Python legacy (#5184)
Fokko Jul 3, 2022
19bc655
API: Add a scan for changes (#4870)
aokolnychyi Jul 3, 2022
2a289b5
Flink: Support write options in FlinkSink builder (#3998)
hililiwei Jul 3, 2022
6321c64
Spark 3.3: Re-enable 2-level Parquet list test (#5179)
singhpk234 Jul 3, 2022
902b72f
API: Add java type to composite TypeID enums (#5154)
dimas-b Jul 3, 2022
bb19394
Docs: Add Flink and Iceberg type compatibility tables (#4865)
wuwenchi Jul 3, 2022
17708f5
Flink: Fix typo in FlinkSink string (#5176)
a49a Jul 5, 2022
14295aa
Docs: Fix typo in docs (#5202)
samredai Jul 5, 2022
253b6f8
Core: Fix ErrorProne Warnings (#5200)
nastra Jul 5, 2022
2a3b3b7
Arrow: Fix for dictionary encoded fixed length binary decimals (#5198)
bryanck Jul 5, 2022
ca664b2
Restricting toBranch only for BaseRowDelta
Jul 9, 2022
cf49279
spell check
Jul 10, 2022
37d5444
Merge branch 'branch-check-validations' of https://github.com/namrath…
Jul 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api/src/main/java/org/apache/iceberg/SnapshotUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,10 @@ public interface SnapshotUpdate<ThisT> extends PendingUpdate<Snapshot> {
* @return this for method chaining
*/
ThisT scanManifestsWith(ExecutorService executorService);

/**
* Perform operations on a particular branch
* @param branch which is name of SanshotRef of type branch.
*/
ThisT toBranch(String branch);
}
53 changes: 42 additions & 11 deletions core/src/main/java/org/apache/iceberg/BaseRowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@

package org.apache.iceberg;

import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.util.CharSequenceSet;
import org.apache.iceberg.util.SnapshotUtil;

class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements RowDelta {
private Long startingSnapshotId = null; // check all versions by default
Expand Down Expand Up @@ -96,20 +98,49 @@ public RowDelta validateNoConflictingDeleteFiles() {
}

@Override
protected void validate(TableMetadata base) {
if (base.currentSnapshot() != null) {
if (!referencedDataFiles.isEmpty()) {
validateDataFilesExist(
base, startingSnapshotId, referencedDataFiles, !validateDeletes, conflictDetectionFilter);
}
public RowDelta toBranch(String branch) {
Preconditions.checkArgument(branch != null, "branch cannot be null");
if (this.current().ref(branch) == null) {
super.createNewRef(branch);
}

if (validateNewDataFiles) {
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter);
}
Preconditions.checkArgument(this.current().ref(branch).type().equals(SnapshotRefType.BRANCH),
"%s is not a ref to type branch", branch);
setTargetBranch(branch);
return self();
}

if (validateNewDeleteFiles) {
validateNoNewDeleteFiles(base, startingSnapshotId, conflictDetectionFilter);
private void checkIfSnapshotIsAnAncestor(Snapshot current, TableMetadata base) {
if (this.startingSnapshotId == null || current == null) {
return;
}

for (Snapshot ancestor : SnapshotUtil.ancestorsOf(current.snapshotId(), base::snapshot)) {
if (ancestor.snapshotId() == this.startingSnapshotId) {
return;
}

}
throw new ValidationException("Snapshot %s is not an ancestor of branch %s", startingSnapshotId, targetBranch());
}

@Override
protected void validate(TableMetadata base) {
Snapshot current = base.ref(targetBranch()) != null ?
base.snapshot(base.ref(targetBranch()).snapshotId()) : base.currentSnapshot();

checkIfSnapshotIsAnAncestor(current, base);
if (!referencedDataFiles.isEmpty()) {
validateDataFilesExist(
base, startingSnapshotId, referencedDataFiles, !validateDeletes, conflictDetectionFilter);
}

if (validateNewDataFiles) {
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter);
}

if (validateNewDeleteFiles) {
validateNoNewDeleteFiles(base, startingSnapshotId, conflictDetectionFilter);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ protected void failMissingDeletePaths() {
deleteFilterManager.failMissingDeletePaths();
}

protected void createNewRef(String branch) {
SnapshotRef branchRef = SnapshotRef.branchBuilder(this.current().currentSnapshot().snapshotId()).build();
TableMetadata.Builder updatedBuilder = TableMetadata.buildFrom(this.current());
updatedBuilder.setRef(branch, branchRef);
ops.commit(ops.current(), updatedBuilder.build());
}

/**
* Add a filter to match files to delete. A file will be deleted if all of the rows it contains
* match this or any other filter passed to this method.
Expand Down Expand Up @@ -683,7 +690,8 @@ protected Map<String, String> summary() {

@Override
public List<ManifestFile> apply(TableMetadata base) {
Snapshot current = base.currentSnapshot();
Snapshot current = base.ref(targetBranch()) != null ?
base.snapshot(base.ref(targetBranch()).snapshotId()) : base.currentSnapshot();

// filter any existing manifests
List<ManifestFile> filtered = filterManager.filterManifests(
Expand Down
23 changes: 19 additions & 4 deletions core/src/main/java/org/apache/iceberg/SnapshotProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public void accept(String file) {

private ExecutorService workerPool = ThreadPools.getWorkerPool();

private String targetBranch = SnapshotRef.MAIN_BRANCH;

protected SnapshotProducer(TableOperations ops) {
this.ops = ops;
this.base = ops.current();
Expand All @@ -104,6 +106,10 @@ protected SnapshotProducer(TableOperations ops) {

protected abstract ThisT self();

protected String targetBranch() {
return targetBranch;
}

@Override
public ThisT stageOnly() {
this.stageOnly = true;
Expand All @@ -116,6 +122,15 @@ public ThisT scanManifestsWith(ExecutorService executorService) {
return self();
}

@Override
public ThisT toBranch(String branch) {
throw new UnsupportedOperationException("Performing operations on a branch is is only supported for BaseRowDelta");
}

protected void setTargetBranch(String branch) {
this.targetBranch = branch;
}

protected ExecutorService workerPool() {
return this.workerPool;
}
Expand Down Expand Up @@ -167,8 +182,8 @@ protected void validate(TableMetadata currentMetadata) {
@Override
public Snapshot apply() {
refresh();
Long parentSnapshotId = base.currentSnapshot() != null ?
base.currentSnapshot().snapshotId() : null;
Long parentSnapshotId = base.ref(targetBranch) != null ? base.ref(targetBranch).snapshotId() : null;

long sequenceNumber = base.nextSequenceNumber();

// run validations from the child operation
Expand Down Expand Up @@ -298,11 +313,11 @@ public void commit() {
TableMetadata.Builder update = TableMetadata.buildFrom(base);
if (base.snapshot(newSnapshot.snapshotId()) != null) {
// this is a rollback operation
update.setBranchSnapshot(newSnapshot.snapshotId(), SnapshotRef.MAIN_BRANCH);
update.setBranchSnapshot(newSnapshot.snapshotId(), targetBranch);
} else if (stageOnly) {
update.addSnapshot(newSnapshot);
} else {
update.setBranchSnapshot(newSnapshot, SnapshotRef.MAIN_BRANCH);
update.setBranchSnapshot(newSnapshot, targetBranch);
}

TableMetadata updated = update.build();
Expand Down
64 changes: 64 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestRowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.junit.Assert;
import org.junit.Test;
Expand Down Expand Up @@ -1323,4 +1324,67 @@ public void testRowDeltaCaseSensitivity() {
.validateNoConflictingDeleteFiles()
.commit());
}

@Test
public void testBranchValidationsNotValidAncestor() {
table.newAppend()
.appendFile(FILE_A)
.commit();

Expression conflictDetectionFilter = Expressions.alwaysTrue();

Long firstSnapshot = table.currentSnapshot().snapshotId();

table.manageSnapshots().createBranch("newBranch", firstSnapshot).commit();

table.newAppend()
.appendFile(FILE_B)
.commit();

// This commit will result in validation exception as we start validation from a snapshot which is
// not an ancestor of the branch
RowDelta rowDelta = table.newRowDelta()
.toBranch("newBranch")
.addDeletes(FILE_A_DELETES)
.validateFromSnapshot(table.currentSnapshot().snapshotId())
.conflictDetectionFilter(conflictDetectionFilter)
.validateNoConflictingDeleteFiles();

AssertHelpers.assertThrows("No matching ancestor found", ValidationException.class, () -> rowDelta.commit());
}

@Test
public void testBranchValidationsValidAncestor() {
table.newAppend()
.appendFile(FILE_A)
.commit();

Expression conflictDetectionFilter = Expressions.alwaysTrue();

Long firstSnapshot = table.currentSnapshot().snapshotId();

table.manageSnapshots().createBranch("newBranch", firstSnapshot).commit();

table.newAppend()
.appendFile(FILE_B)
.commit();

// This commit not result in validation exception as we start validation from a snapshot which is
// not an ancestor of the branch
table.newRowDelta()
.toBranch("newBranch")
.addDeletes(FILE_A_DELETES)
.validateFromSnapshot(firstSnapshot)
.conflictDetectionFilter(conflictDetectionFilter)
.validateNoConflictingDeleteFiles().commit();

List<ManifestFile> dataManifests = table.ops().current().snapshot(table.ops().current()
.ref("newBranch").snapshotId()).dataManifests(table.io());
Assert.assertEquals("branch should have 1 data manifest", 1, Iterables.size(dataManifests));
List<ManifestFile> deleteManifests = table.ops().current().snapshot(table.ops().current()
.ref("newBranch").snapshotId()).deleteManifests(table.io());
Assert.assertEquals("branch should have 1 delete manifest", 1, Iterables.size(deleteManifests));
List<ManifestFile> mainBranchManifests = table.currentSnapshot().dataManifests(table.io());
Assert.assertEquals("main branch should have 2 data manifest", 2, Iterables.size(mainBranchManifests));
}
}