Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests should explicitly check for schema_id #487

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None:
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 1

transaction = given_table.transaction()
transaction.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit()
Expand All @@ -628,9 +628,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None:
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 2


def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
Expand All @@ -644,9 +644,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 1

with given_table.transaction() as tx:
tx.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit()
Expand All @@ -657,9 +657,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 2


def test_catalog_repr(catalog: InMemoryCatalog) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_partition_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,9 @@ def test_change_specs_and_schema_transaction(catalog: Catalog) -> None:
NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False),
NestedField(field_id=3, name='str', field_type=StringType(), required=False),
NestedField(field_id=4, name='col_string', field_type=StringType(), required=False),
schema_id=1,
identifier_field_ids=[],
)
assert table.schema().schema_id == 1


@pytest.mark.integration
Expand Down
1 change: 0 additions & 1 deletion tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def create_table(catalog: Catalog) -> Table:
NestedField(field_id=2, name="int", field_type=IntegerType(), required=True),
NestedField(field_id=3, name="bool", field_type=BooleanType(), required=False),
NestedField(field_id=4, name="datetime", field_type=TimestampType(), required=False),
schema_id=1,
)

return catalog.create_table(identifier=TABLE_NAME, schema=schema)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_rest_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,19 +361,19 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non
NestedField(field_id=1, name='foo', field_type=StringType(), required=False),
NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True),
NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False),
schema_id=0,
identifier_field_ids=[2],
),
1: Schema(
NestedField(field_id=1, name='foo', field_type=StringType(), required=False),
NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True),
NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False),
NestedField(field_id=4, name='data', field_type=IntegerType(), required=False),
schema_id=1,
identifier_field_ids=[2],
),
}
assert simple_table.schema().schema_id == 0
assert simple_table.schemas()[0].schema_id == 0
assert simple_table.schemas()[1].schema_id == 1


@pytest.mark.integration
Expand Down
31 changes: 18 additions & 13 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,26 @@ def test_schema(table_v2: Table) -> None:
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert table_v2.schema().schema_id == 1


def test_schemas(table_v2: Table) -> None:
assert table_v2.schemas() == {
0: Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
),
1: Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
),
}
assert table_v2.schemas()[0].schema_id == 0
assert table_v2.schemas()[1].schema_id == 1


def test_spec(table_v2: Table) -> None:
Expand Down Expand Up @@ -266,31 +266,34 @@ def test_table_scan_ref_does_not_exists(table_v2: Table) -> None:

def test_table_scan_projection_full_schema(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.select("x", "y", "z").projection() == Schema(
projection_schema = scan.select("x", "y", "z").projection()
assert projection_schema == Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_single_column(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.select("y").projection() == Schema(
projection_schema = scan.select("y").projection()
assert projection_schema == Schema(
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
schema_id=1,
identifier_field_ids=[2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_single_column_case_sensitive(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.with_case_sensitive(False).select("Y").projection() == Schema(
projection_schema = scan.with_case_sensitive(False).select("Y").projection()
assert projection_schema == Schema(
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
schema_id=1,
identifier_field_ids=[2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_unknown_column(table_v2: Table) -> None:
Expand Down Expand Up @@ -983,20 +986,22 @@ def test_correct_schema() -> None:
)

# Should use the current schema, instead the one from the snapshot
assert t.scan().projection() == Schema(
projection_schema = t.scan().projection()
assert projection_schema == Schema(
NestedField(field_id=1, name='x', field_type=LongType(), required=True),
NestedField(field_id=2, name='y', field_type=LongType(), required=True),
NestedField(field_id=3, name='z', field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert projection_schema.schema_id == 1

# When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot
assert t.scan(snapshot_id=123).projection() == Schema(
projection_schema = t.scan(snapshot_id=123).projection()
assert projection_schema == Schema(
NestedField(field_id=1, name='x', field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
)
assert projection_schema.schema_id == 0

with pytest.warns(UserWarning, match="Metadata does not contain schema with id: 10"):
t.scan(snapshot_id=234).projection()
Expand Down
1 change: 0 additions & 1 deletion tests/table/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, Any])
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
)
]
Expand Down