Skip to content

Commit

Permalink
materialize-sql: migrate to JSON type when possible
Browse files Browse the repository at this point in the history
  • Loading branch information
mdibaiee committed Oct 14, 2024
1 parent 917e2c3 commit dea33d0
Show file tree
Hide file tree
Showing 22 changed files with 155 additions and 36 deletions.
16 changes: 12 additions & 4 deletions materialize-bigquery/.snapshots/TestValidateAndApplyMigrations
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Base Initial Constraints:
{"Field":"flow_document","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"The root document must be materialized"}
{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"key","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"All Locations that are part of the collections key are required"}
{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"}
{"Field":"nonScalarValue","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"}
Expand All @@ -14,6 +15,7 @@ Base Initial Constraints:
{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Only a single root document projection can be materialized for standard updates"}
{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}

Migratable Changes Before Apply Schema:
Expand All @@ -23,19 +25,21 @@ Migratable Changes Before Apply Schema:
{"Name":"flow_document","Nullable":"NO","Type":"STRING"}
{"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"}
{"Name":"int64","Nullable":"YES","Type":"INT64"}
{"Name":"intWidenedToJson","Nullable":"YES","Type":"INT64"}
{"Name":"key","Nullable":"NO","Type":"STRING"}
{"Name":"multiple","Nullable":"YES","Type":"JSON"}
{"Name":"nonScalarValue","Nullable":"YES","Type":"STRING"}
{"Name":"numericString","Nullable":"YES","Type":"BIGNUMERIC(38)"}
{"Name":"optional","Nullable":"YES","Type":"STRING"}
{"Name":"requiredNumeric","Nullable":"NO","Type":"BIGNUMERIC(38)"}
{"Name":"scalarValue","Nullable":"NO","Type":"STRING"}
{"Name":"stringWidenedToJson","Nullable":"YES","Type":"STRING"}
{"Name":"timeValue","Nullable":"YES","Type":"STRING"}


Migratable Changes Before Apply Data:
key (STRING), _meta_flow_truncated (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMP), flow_published_at (TIMESTAMP), int64 (INTEGER), multiple (JSON), nonScalarValue (STRING), numericString (BIGNUMERIC), optional (STRING), requiredNumeric (BIGNUMERIC), scalarValue (STRING), timeValue (STRING), flow_document (STRING)
1, false, 2024-01-01, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, 123/1, <nil>, 456/1, test, 01:01:01, {}
key (STRING), _meta_flow_truncated (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMP), flow_published_at (TIMESTAMP), int64 (INTEGER), intWidenedToJson (INTEGER), multiple (JSON), nonScalarValue (STRING), numericString (BIGNUMERIC), optional (STRING), requiredNumeric (BIGNUMERIC), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING)
1, false, 2024-01-01, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, 999, <nil>, <nil>, 123/1, <nil>, 456/1, test, hello, 01:01:01, {}

Migratable Changes Constraints:
{"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
Expand All @@ -44,6 +48,7 @@ Migratable Changes Constraints:
{"Field":"flow_document","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is the document in the current materialization"}
{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"}
{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"nonScalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
Expand All @@ -53,6 +58,7 @@ Migratable Changes Constraints:
{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize root document projection 'second_root' because field 'flow_document' is already being materialized as the document"}
{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}

Migratable Changes Applied Schema:
Expand All @@ -62,17 +68,19 @@ Migratable Changes Applied Schema:
{"Name":"flow_document","Nullable":"NO","Type":"STRING"}
{"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"}
{"Name":"int64","Nullable":"YES","Type":"INT64"}
{"Name":"intWidenedToJson","Nullable":"YES","Type":"JSON"}
{"Name":"key","Nullable":"NO","Type":"STRING"}
{"Name":"multiple","Nullable":"YES","Type":"JSON"}
{"Name":"nonScalarValue","Nullable":"YES","Type":"STRING"}
{"Name":"numericString","Nullable":"YES","Type":"STRING"}
{"Name":"optional","Nullable":"YES","Type":"STRING"}
{"Name":"requiredNumeric","Nullable":"YES","Type":"STRING"}
{"Name":"scalarValue","Nullable":"NO","Type":"STRING"}
{"Name":"stringWidenedToJson","Nullable":"YES","Type":"JSON"}
{"Name":"timeValue","Nullable":"YES","Type":"STRING"}


Migratable Changes Applied Data:
key (STRING), _meta_flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), int64 (INTEGER), multiple (JSON), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), numericString (STRING), requiredNumeric (STRING)
1, false, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, <nil>, test, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111Z, 123, 456
key (STRING), _meta_flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), int64 (INTEGER), multiple (JSON), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), intWidenedToJson (JSON), numericString (STRING), requiredNumeric (STRING), stringWidenedToJson (JSON)
1, false, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, <nil>, test, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111Z, 999, 123, 456, "hello"

5 changes: 5 additions & 0 deletions materialize-bigquery/sqlgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ var bqDialect = func() sql.Dialect {
"float": {sql.NewMigrationSpec([]string{"string"})},
"date": {sql.NewMigrationSpec([]string{"string"})},
"timestamp": {sql.NewMigrationSpec([]string{"string"}, sql.WithCastSQL(datetimeToStringCast))},
"*": {sql.NewMigrationSpec([]string{"json"}, sql.WithCastSQL(toJsonCast))},
},
TableLocatorer: sql.TableLocatorFn(func(path []string) sql.InfoTableLocation {
return sql.InfoTableLocation{
Expand Down Expand Up @@ -135,6 +136,10 @@ func datetimeToStringCast(migration sql.ColumnTypeMigration) string {
return fmt.Sprintf(`FORMAT_TIMESTAMP('%%Y-%%m-%%dT%%H:%%M:%%E*SZ', %s, 'UTC') `, migration.Identifier)
}

func toJsonCast(migration sql.ColumnTypeMigration) string {
return fmt.Sprintf(`TO_JSON(%s)`, migration.Identifier)
}

var (
tplAll = sql.MustParseTemplate(bqDialect, "root", `
{{ define "tempTableName" -}}
Expand Down
16 changes: 12 additions & 4 deletions materialize-databricks/.snapshots/TestValidateAndApplyMigrations
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Base Initial Constraints:
{"Field":"flow_document","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"The root document must be materialized"}
{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"key","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"All Locations that are part of the collections key are required"}
{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"}
{"Field":"nonScalarValue","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"}
Expand All @@ -14,6 +15,7 @@ Base Initial Constraints:
{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Only a single root document projection can be materialized for standard updates"}
{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}
{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"}

Migratable Changes Before Apply Schema:
Expand All @@ -23,20 +25,22 @@ Migratable Changes Before Apply Schema:
{"Name":"flow_document","Nullable":"NO","Type":"STRING"}
{"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"}
{"Name":"int64","Nullable":"YES","Type":"LONG"}
{"Name":"intWidenedToJson","Nullable":"YES","Type":"LONG"}
{"Name":"key","Nullable":"NO","Type":"STRING"}
{"Name":"multiple","Nullable":"YES","Type":"STRING"}
{"Name":"nonScalarValue","Nullable":"YES","Type":"STRING"}
{"Name":"numericString","Nullable":"YES","Type":"DECIMAL"}
{"Name":"optional","Nullable":"YES","Type":"STRING"}
{"Name":"requiredNumeric","Nullable":"NO","Type":"DECIMAL"}
{"Name":"scalarValue","Nullable":"NO","Type":"STRING"}
{"Name":"stringWidenedToJson","Nullable":"YES","Type":"STRING"}
{"Name":"timeValue","Nullable":"YES","Type":"STRING"}


Migratable Changes Before Apply Data:
key (STRING), _meta/flow_truncated (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMP), flow_published_at (TIMESTAMP), int64 (BIGINT), multiple (STRING), nonScalarValue (STRING), numericString (DECIMAL), optional (STRING), requiredNumeric (DECIMAL), scalarValue (STRING), timeValue (STRING), flow_document (STRING)
key (STRING), _meta/flow_truncated (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMP), flow_published_at (TIMESTAMP), int64 (BIGINT), intWidenedToJson (BIGINT), multiple (STRING), nonScalarValue (STRING), numericString (DECIMAL), optional (STRING), requiredNumeric (DECIMAL), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING)

1, false, 2024-01-01 00:00:00 +0000 UTC, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, 123, <nil>, 456, test, 01:01:01, {}
1, false, 2024-01-01 00:00:00 +0000 UTC, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, 999, <nil>, <nil>, 123, <nil>, 456, test, hello, 01:01:01, {}

Migratable Changes Constraints:
{"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
Expand All @@ -45,6 +49,7 @@ Migratable Changes Constraints:
{"Field":"flow_document","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is the document in the current materialization"}
{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"}
{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"nonScalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
Expand All @@ -54,6 +59,7 @@ Migratable Changes Constraints:
{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize root document projection 'second_root' because field 'flow_document' is already being materialized as the document"}
{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}
{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"}

Migratable Changes Applied Schema:
Expand All @@ -63,18 +69,20 @@ Migratable Changes Applied Schema:
{"Name":"flow_document","Nullable":"NO","Type":"STRING"}
{"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"}
{"Name":"int64","Nullable":"YES","Type":"LONG"}
{"Name":"intWidenedToJson","Nullable":"YES","Type":"STRING"}
{"Name":"key","Nullable":"NO","Type":"STRING"}
{"Name":"multiple","Nullable":"YES","Type":"STRING"}
{"Name":"nonScalarValue","Nullable":"YES","Type":"STRING"}
{"Name":"numericString","Nullable":"YES","Type":"STRING"}
{"Name":"optional","Nullable":"YES","Type":"STRING"}
{"Name":"requiredNumeric","Nullable":"NO","Type":"STRING"}
{"Name":"scalarValue","Nullable":"NO","Type":"STRING"}
{"Name":"stringWidenedToJson","Nullable":"YES","Type":"STRING"}
{"Name":"timeValue","Nullable":"YES","Type":"STRING"}


Migratable Changes Applied Data:
key (STRING), _meta/flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), int64 (BIGINT), multiple (STRING), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), numericString (STRING), requiredNumeric (STRING)
key (STRING), _meta/flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), int64 (BIGINT), multiple (STRING), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), intWidenedToJson (STRING), numericString (STRING), requiredNumeric (STRING)

1, false, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, <nil>, test, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111000Z, 123, 456
1, false, 2024-09-13 01:01:01 +0000 UTC, 1, <nil>, <nil>, <nil>, test, hello, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111000Z, 999, 123, 456

Loading

0 comments on commit dea33d0

Please sign in to comment.