From 95f8f87b4eaabe4bd436e914513d0e711cdf98e4 Mon Sep 17 00:00:00 2001 From: ahmedabu98 Date: Wed, 1 Mar 2023 21:38:36 +0000 Subject: [PATCH] new cross-language task+configuration with input expansion service and python collect marker --- .../setup-self-hosted-action/action.yml | 8 +- .../workflows/build_playground_frontend.yml | 2 +- .../workflows/git_tag_released_version.yml | 2 +- .github/workflows/go_tests.yml | 4 +- .github/workflows/issue-tagger.yml | 2 +- .github/workflows/java_tests.yml | 8 +- .github/workflows/label_prs.yml | 2 +- .../workflows/playground_deploy_backend.yml | 2 +- .github/workflows/pr-bot-new-prs.yml | 2 +- .github/workflows/pr-bot-pr-updates.yml | 2 +- .../pr-bot-prs-needing-attention.yml | 2 +- .../publish_github_release_notes.yml | 4 +- .github/workflows/python_tests.yml | 2 +- .github/workflows/reportGenerator.yml | 2 +- .github/workflows/self-assign.yml | 2 +- .github/workflows/stale.yml | 2 +- .github/workflows/triaged-on-assign.yml | 2 +- .test-infra/dataproc/flink_cluster.sh | 3 +- .../jenkins/CommonJobProperties.groovy | 4 + .../jenkins/PrecommitJobBuilder.groovy | 18 +- .test-infra/jenkins/README.md | 31 +- .test-infra/jenkins/job_PostCommit_Go.groovy | 2 +- ...t_Python_CrossLanguage_Gcp_Dataflow.groovy | 6 +- ...mit_Python_CrossLanguage_Gcp_Direct.groovy | 10 +- .../job_PreCommit_Java_Amqp_IO_Direct.groovy | 41 - ..._PreCommit_Java_Cassandra_IO_Direct.groovy | 42 - .../job_PreCommit_Java_Cdap_IO_Direct.groovy | 44 - ...PreCommit_Java_Clickhouse_IO_Direct.groovy | 41 - ...b_PreCommit_Java_Debezium_IO_Direct.groovy | 45 - ...Commit_Java_ElasticSearch_IO_Direct.groovy | 48 - ...b_PreCommit_Java_HCatalog_IO_Direct.groovy | 43 - .../job_PreCommit_Java_Hbase_IO_Direct.groovy | 43 - .../jenkins/job_PreCommit_Java_IOs.groovy | 146 ++ ...b_PreCommit_Java_InfluxDb_IO_Direct.groovy | 42 - .../job_PreCommit_Java_Kudu_IO_Direct.groovy | 42 - .../job_PreCommit_Java_Mqtt_IO_Direct.groovy | 42 - .../job_PreCommit_Java_Neo4j_IO_Direct.groovy | 44 - ...b_PreCommit_Java_RabbitMq_IO_Direct.groovy | 42 - .../job_PreCommit_Java_Redis_IO_Direct.groovy | 42 - ...reCommit_Java_Singlestore_IO_Direct.groovy | 43 - ..._PreCommit_Java_Snowflake_IO_Direct.groovy | 45 - .../job_PreCommit_Java_Solr_IO_Direct.groovy | 42 - ...job_PreCommit_Java_Splunk_IO_Direct.groovy | 41 - ...job_PreCommit_Java_Thrift_IO_Direct.groovy | 41 - .../job_PreCommit_Java_Tika_IO_Direct.groovy | 41 - CHANGES.md | 43 +- build.gradle.kts | 34 + .../beam/gradle/BeamModulePlugin.groovy | 146 +- dev-support/docker/Dockerfile | 2 +- examples/java/build.gradle | 1 + .../beam/examples/complete/AutoComplete.java | 2 +- .../examples/complete/TrafficMaxLaneFlow.java | 2 +- .../beam/examples/complete/TrafficRoutes.java | 2 +- .../examples/complete/game/UserScore.java | 2 +- .../kafkatopubsub/avro/AvroDataClass.java | 2 +- .../transforms/FormatTransform.java | 2 +- .../beam/examples/snippets/Snippets.java | 2 +- .../io/gcp/bigquery/BigQueryMyData.java | 2 +- .../subprocess/utils/ExecutableFile.java | 2 +- .../complete/game/LeaderBoardTest.java | 2 +- .../complete/game/StatefulTeamScoreTest.java | 2 +- examples/kotlin/build.gradle | 1 + .../beam/examples/kotlin/snippets/Snippets.kt | 2 +- gradle.properties | 6 +- .../main/scripts/build_release_candidate.sh | 3 +- release/src/main/scripts/mass_comment.py | 53 +- runners/core-construction-java/build.gradle | 2 + .../AvroGenericCoderRegistrar.java | 2 +- .../AvroGenericCoderTranslator.java | 2 +- .../core/construction/Environments.java | 1 + .../construction/CoderTranslationTest.java | 2 +- .../core/construction/EnvironmentsTest.java | 3 + runners/direct-java/build.gradle | 4 +- runners/flink/1.16/build.gradle | 34 + .../1.16/job-server-container/build.gradle | 26 + runners/flink/1.16/job-server/build.gradle | 31 + .../flink/ReadSourceStreamingTest.java | 3 +- .../beam/runners/flink/ReadSourceTest.java | 3 +- .../flink/streaming/GroupByNullKeyTest.java | 3 +- .../streaming/TopWikipediaSessionsTest.java | 3 +- .../google-cloud-dataflow-java/build.gradle | 2 + .../util/AvroCoderCloudObjectTranslator.java | 2 +- .../dataflow/util/CloudObjectsTest.java | 2 +- .../worker/build.gradle | 1 + .../dataflow/worker/AvroByteReader.java | 4 +- .../dataflow/worker/WindmillStateReader.java | 15 +- .../worker/WindmillStateReaderTest.java | 58 +- .../translation/SparkSessionFactory.java | 16 +- runners/spark/spark_runner.gradle | 2 + .../runners/spark/io/AvroPipelineTest.java | 2 +- sdks/go.mod | 97 +- sdks/go.sum | 870 ++----- sdks/go/examples/wasm/README.md | 6 +- sdks/go/examples/wasm/wasm.go | 6 +- sdks/go/pkg/beam/core/core.go | 2 +- sdks/go/pkg/beam/core/graph/fn.go | 8 +- .../go/pkg/beam/core/runtime/exec/datasink.go | 2 +- .../go/pkg/beam/core/runtime/xlangx/expand.go | 6 +- sdks/go/pkg/beam/io/mongodbio/read.go | 29 +- sdks/go/pkg/beam/io/textio/textio.go | 39 +- sdks/go/pkg/beam/runners/prism/README.md | 1 + .../runners/prism/internal/coders_test.go | 6 +- .../prism/internal/config/config_test.go | 2 +- .../runners/prism/internal/engine/data.go | 30 + .../prism/internal/engine/elementmanager.go | 675 ++++++ .../internal/engine/elementmanager_test.go | 516 ++++ .../runners/prism/internal/engine/strategy.go | 50 + .../prism/internal/engine/strategy_test.go | 45 + .../beam/runners/prism/internal/execute.go | 304 +++ .../runners/prism/internal/execute_test.go | 417 ++++ .../runners/prism/internal/handlecombine.go | 209 ++ .../runners/prism/internal/handlepardo.go | 244 ++ .../runners/prism/internal/handlerunner.go | 298 +++ .../prism/internal/jobservices/artifact.go | 81 + .../runners/prism/internal/jobservices/job.go | 120 + .../prism/internal/jobservices/management.go | 142 ++ .../prism/internal/jobservices/metrics.go | 2 - .../internal/jobservices/metrics_test.go | 3 +- .../prism/internal/jobservices/server.go | 82 + .../prism/internal/jobservices/server_test.go | 79 + .../beam/runners/prism/internal/preprocess.go | 148 ++ .../runners/prism/internal/preprocess_test.go | 181 ++ .../runners/prism/internal/separate_test.go | 595 +++++ .../pkg/beam/runners/prism/internal/stage.go | 400 +++ .../beam/runners/prism/internal/testdofns.go | 349 +++ .../runners/prism/internal/testdofns_test.go | 55 + .../beam/runners/prism/internal/urns/urns.go | 2 +- .../runners/prism/internal/worker/bundle.go | 114 + .../prism/internal/worker/bundle_test.go | 52 + .../runners/prism/internal/worker/worker.go | 421 ++++ .../prism/internal/worker/worker_test.go | 281 +++ sdks/go/pkg/beam/runners/prism/prism.go | 48 + .../integration/primitives/checkpointing.go | 2 +- .../org/apache/beam/sdk/coders/AvroCoder.java | 5 + .../beam/sdk/coders/AvroGenericCoder.java | 10 +- .../apache/beam/sdk/coders/DefaultCoder.java | 2 +- .../java/org/apache/beam/sdk/io/AvroIO.java | 6 + .../beam/sdk/io/AvroSchemaIOProvider.java | 6 + .../java/org/apache/beam/sdk/io/AvroSink.java | 10 +- .../org/apache/beam/sdk/io/AvroSource.java | 5 + .../beam/sdk/io/ConstantAvroDestination.java | 10 +- .../beam/sdk/io/DynamicAvroDestinations.java | 6 + .../sdk/io/SerializableAvroCodecFactory.java | 7 + .../beam/sdk/schemas/AvroRecordSchema.java | 6 + .../apache/beam/sdk/schemas/io/Providers.java | 30 +- .../AvroPayloadSerializerProvider.java | 8 + .../sdk/schemas/utils/AvroByteBuddyUtils.java | 8 + .../beam/sdk/schemas/utils/AvroUtils.java | 6 + .../apache/beam/sdk/io/TextIOWriteTest.java | 30 +- .../beam/sdk/transforms/LatestTest.java | 4 +- sdks/java/extensions/avro/build.gradle | 4 +- .../avro/coders/DefaultCoderTest.java | 61 + .../avro/schemas/SchemaCoderTest.java | 196 ++ .../avro/schemas/transforms/ConvertTest.java | 147 ++ .../ExternalSchemaIOTransformRegistrar.java | 60 +- sdks/java/extensions/sketching/build.gradle | 1 + .../sketching/ApproximateDistinctTest.java | 2 +- .../sketching/SketchFrequenciesTest.java | 2 +- sdks/java/extensions/sql/build.gradle | 1 + .../meta/provider/avro/AvroTableProvider.java | 4 +- .../meta/provider/parquet/ParquetTable.java | 2 +- .../kafka/BeamKafkaTableAvroTest.java | 4 +- .../provider/kafka/KafkaTableProviderIT.java | 2 +- .../pubsub/PubsubTableProviderIT.java | 4 +- sdks/java/harness/build.gradle | 1 + .../beam/fn/harness/FnApiDoFnRunner.java | 257 +- .../org/apache/beam/fn/harness/FnHarness.java | 14 +- .../harness/control/ProcessBundleHandler.java | 18 +- .../data/PCollectionConsumerRegistry.java | 65 +- .../beam/fn/harness/debug/DataSampler.java | 125 + .../beam/fn/harness/debug/OutputSampler.java | 129 + .../beam/fn/harness/debug/package-info.java | 25 +- .../control/ProcessBundleHandlerTest.java | 48 +- .../data/PCollectionConsumerRegistryTest.java | 60 + .../fn/harness/debug/DataSamplerTest.java | 273 +++ .../fn/harness/debug/OutputSamplerTest.java | 188 ++ sdks/java/io/amazon-web-services/build.gradle | 1 + .../io/aws/dynamodb/DynamoDBIOWriteTest.java | 2 +- .../java/io/amazon-web-services2/build.gradle | 1 + .../io/aws2/dynamodb/DynamoDBIOWriteTest.java | 2 +- sdks/java/io/cdap/build.gradle | 1 + .../beam/sdk/io/cdap/TestRowDBWritable.java | 2 +- sdks/java/io/file-based-io-tests/build.gradle | 1 + .../org/apache/beam/sdk/io/avro/AvroIOIT.java | 4 +- .../beam/sdk/io/parquet/ParquetIOIT.java | 2 +- .../io/file-schema-transform/build.gradle | 1 + ...vroWriteSchemaTransformFormatProvider.java | 6 +- ...leWriteSchemaTransformFormatProviders.java | 2 +- ...uetWriteSchemaTransformFormatProvider.java | 2 +- ...riteSchemaTransformFormatProviderTest.java | 6 +- ...riteSchemaTransformFormatProviderTest.java | 2 +- .../io/google-cloud-platform/build.gradle | 2 + .../AvroGenericRecordToStorageApiProto.java | 2 +- .../beam/sdk/io/gcp/bigquery/BigQueryIO.java | 361 ++- .../sdk/io/gcp/bigquery/BigQueryOptions.java | 11 + .../io/gcp/bigquery/BigQueryQuerySource.java | 2 +- .../gcp/bigquery/BigQueryQuerySourceDef.java | 2 +- .../io/gcp/bigquery/BigQueryServicesImpl.java | 3 + .../io/gcp/bigquery/BigQuerySourceBase.java | 2 +- .../io/gcp/bigquery/BigQuerySourceDef.java | 2 +- .../bigquery/BigQueryStorageArrowReader.java | 2 +- .../bigquery/BigQueryStorageSourceBase.java | 67 +- .../BigQueryStorageStreamBundleSource.java | 381 +++ .../io/gcp/bigquery/BigQueryTableSource.java | 2 +- .../gcp/bigquery/BigQueryTableSourceDef.java | 2 +- ...geApiDynamicDestinationsGenericRecord.java | 2 +- .../bigquery/TableRowToStorageApiProto.java | 39 +- ...torageWriteApiSchemaTransformProvider.java | 3 +- .../beam/sdk/io/gcp/pubsub/PubsubClient.java | 2 +- .../beam/sdk/io/gcp/pubsub/PubsubIO.java | 4 +- ...PubsubLiteReadSchemaTransformProvider.java | 2 +- ...ubsubLiteWriteSchemaTransformProvider.java | 2 +- .../model/ChangeStreamRecordMetadata.java | 2 +- .../changestreams/model/ChildPartition.java | 2 +- .../model/ChildPartitionsRecord.java | 2 +- .../changestreams/model/ColumnType.java | 2 +- .../changestreams/model/DataChangeRecord.java | 2 +- .../changestreams/model/HeartbeatRecord.java | 2 +- .../gcp/spanner/changestreams/model/Mod.java | 2 +- .../spanner/changestreams/model/ModType.java | 2 +- .../model/PartitionMetadata.java | 2 +- .../spanner/changestreams/model/TypeCode.java | 2 +- .../changestreams/model/ValueCaptureType.java | 2 +- ...vroGenericRecordToStorageApiProtoTest.java | 6 +- .../gcp/bigquery/BigQueryAvroUtilsTest.java | 2 +- .../io/gcp/bigquery/BigQueryIOReadTest.java | 2 +- .../bigquery/BigQueryIOStorageReadTest.java | 2 +- ...StorageReadWithStreamBundleSourceTest.java | 2156 +++++++++++++++++ .../io/gcp/bigquery/BigQueryUtilsTest.java | 2 +- .../TableRowToStorageApiProtoTest.java | 60 +- .../beam/sdk/io/gcp/pubsub/PubsubIOTest.java | 2 +- ...PubsubReadSchemaTransformProviderTest.java | 2 +- .../io/gcp/pubsub/PubsubRowToMessageTest.java | 2 +- ...chemaTransformMessageToRowFactoryTest.java | 2 +- ...ubsubWriteSchemaTransformProviderTest.java | 2 +- .../encoder/TimestampEncodingTest.java | 2 +- sdks/java/io/hadoop-format/build.gradle | 1 + .../beam/sdk/io/hadoop/format/Employee.java | 2 +- .../hadoop/format/HadoopFormatIOReadTest.java | 2 +- .../io/hadoop/format/TestRowDBWritable.java | 2 +- sdks/java/io/influxdb/build.gradle | 1 + .../apache/beam/sdk/io/influxdb/Model.java | 2 +- sdks/java/io/jdbc/build.gradle | 1 + .../sdk/io/jdbc/JdbcSchemaIOProvider.java | 63 +- .../sdk/io/jdbc/JdbcSchemaIOProviderTest.java | 129 + .../beam/sdk/io/jdbc/SchemaUtilTest.java | 2 +- sdks/java/io/kafka/build.gradle | 2 + ...entSchemaRegistryDeserializerProvider.java | 2 +- .../sdk/io/kafka/KafkaCheckpointMark.java | 2 +- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 2 +- .../KafkaReadSchemaTransformProvider.java | 4 +- .../sdk/io/kafka/KafkaUnboundedSource.java | 2 +- .../KafkaWriteSchemaTransformProvider.java | 2 +- ...chemaRegistryDeserializerProviderTest.java | 2 +- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 2 +- sdks/java/io/parquet/build.gradle | 2 + .../apache/beam/sdk/io/parquet/ParquetIO.java | 4 +- .../beam/sdk/io/parquet/ParquetIOTest.java | 4 +- sdks/java/io/snowflake/build.gradle | 1 + .../test/unit/read/SnowflakeIOReadTest.java | 2 +- .../testing/expansion-service/build.gradle | 1 + .../expansion/TestExpansionService.java | 2 +- sdks/java/testing/nexmark/build.gradle | 1 + .../beam/sdk/nexmark/NexmarkLauncher.java | 2 +- .../apache/beam/sdk/nexmark/NexmarkUtils.java | 2 +- sdks/python/apache_beam/coders/coder_impl.pxd | 4 + sdks/python/apache_beam/coders/coder_impl.py | 18 +- sdks/python/apache_beam/coders/coders.py | 19 + .../apache_beam/coders/coders_test_common.py | 1 + sdks/python/apache_beam/coders/row_coder.py | 3 + sdks/python/apache_beam/coders/slow_stream.py | 6 + sdks/python/apache_beam/coders/stream.pxd | 3 + sdks/python/apache_beam/coders/stream.pyx | 16 + sdks/python/apache_beam/coders/stream_test.py | 9 + .../apache_beam/examples/inference/README.md | 53 + .../tensorflow_mnist_with_weights.py | 93 + .../io/external/xlang_jdbcio_it_test.py | 18 + sdks/python/apache_beam/io/fileio.py | 9 +- sdks/python/apache_beam/io/gcp/bigquery.py | 7 +- .../io/gcp/bigquery_write_it_test.py | 8 +- sdks/python/apache_beam/io/jdbc.py | 37 +- sdks/python/apache_beam/io/textio.py | 166 +- sdks/python/apache_beam/io/textio_test.py | 33 + .../ml/inference/tensorflow_inference.py | 29 + .../inference/tensorflow_inference_it_test.py | 37 +- .../apache_beam/options/pipeline_options.py | 2 +- sdks/python/apache_beam/pipeline.py | 2 +- .../runners/direct/transform_evaluator.py | 5 +- sdks/python/apache_beam/utils/timestamp.py | 5 + .../apache_beam/utils/timestamp_test.py | 5 +- sdks/python/apache_beam/version.py | 2 +- sdks/python/container/boot.go | 49 +- sdks/python/pytest.ini | 1 + sdks/python/scripts/generate_pydoc.sh | 6 +- sdks/python/setup.py | 5 +- .../python/test-suites/dataflow/common.gradle | 23 + .../test-suites/dataflow/py310/build.gradle | 1 - .../test-suites/dataflow/py37/build.gradle | 1 - .../test-suites/dataflow/py38/build.gradle | 1 - .../test-suites/dataflow/py39/build.gradle | 1 - .../test-suites/dataflow/xlang/common.gradle | 68 - sdks/python/test-suites/direct/common.gradle | 25 + .../test-suites/direct/py310/build.gradle | 1 - .../test-suites/direct/py37/build.gradle | 1 - .../test-suites/direct/py38/build.gradle | 1 - .../test-suites/direct/py39/build.gradle | 1 - .../common.gradle => xlang/build.gradle} | 49 +- sdks/python/tox.ini | 14 +- sdks/typescript/package.json | 2 +- settings.gradle.kts | 5 + .../en/documentation/ml/model-evaluation.md | 85 + .../content/en/documentation/ml/overview.md | 20 +- .../en/documentation/programming-guide.md | 2 +- .../site/content/en/get-started/from-spark.md | 9 +- .../content/en/get-started/quickstart-go.md | 2 +- .../www/site/layouts/case-studies/list.html | 2 +- .../section-menu/en/documentation.html | 7 +- .../www/site/static/images/ml-workflows.svg | 2 +- 318 files changed, 12637 insertions(+), 2308 deletions(-) delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Amqp_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Cassandra_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Cdap_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Clickhouse_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Debezium_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_ElasticSearch_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_HCatalog_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Hbase_IO_Direct.groovy create mode 100644 .test-infra/jenkins/job_PreCommit_Java_IOs.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_InfluxDb_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Kudu_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Mqtt_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Neo4j_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_RabbitMq_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Redis_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Singlestore_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Snowflake_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Solr_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Splunk_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Thrift_IO_Direct.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Java_Tika_IO_Direct.groovy create mode 100644 runners/flink/1.16/build.gradle create mode 100644 runners/flink/1.16/job-server-container/build.gradle create mode 100644 runners/flink/1.16/job-server/build.gradle create mode 100644 sdks/go/pkg/beam/runners/prism/internal/engine/data.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/engine/strategy.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/engine/strategy_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/execute.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/execute_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/handlecombine.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/handlepardo.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/handlerunner.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/preprocess.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/preprocess_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/separate_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/stage.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/testdofns.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/testdofns_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/worker/bundle_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/worker/worker.go create mode 100644 sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go create mode 100644 sdks/go/pkg/beam/runners/prism/prism.go create mode 100644 sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/DefaultCoderTest.java create mode 100644 sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/SchemaCoderTest.java create mode 100644 sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/transforms/ConvertTest.java create mode 100644 sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java create mode 100644 sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java rename .test-infra/jenkins/job_PreCommit_Java_Jms_IO_Direct.groovy => sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/package-info.java (59%) create mode 100644 sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java create mode 100644 sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java create mode 100644 sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamBundleSource.java create mode 100644 sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadWithStreamBundleSourceTest.java create mode 100644 sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProviderTest.java create mode 100644 sdks/python/apache_beam/examples/inference/tensorflow_mnist_with_weights.py delete mode 100644 sdks/python/test-suites/dataflow/xlang/common.gradle rename sdks/python/test-suites/{direct/xlang/common.gradle => xlang/build.gradle} (61%) create mode 100755 website/www/site/content/en/documentation/ml/model-evaluation.md mode change 100644 => 100755 website/www/site/layouts/partials/section-menu/en/documentation.html diff --git a/.github/actions/setup-self-hosted-action/action.yml b/.github/actions/setup-self-hosted-action/action.yml index 27469359e26a..430a7e1d828e 100644 --- a/.github/actions/setup-self-hosted-action/action.yml +++ b/.github/actions/setup-self-hosted-action/action.yml @@ -34,9 +34,9 @@ inputs: required: false description: 'Set as false if does not require java-8 setup' default: 'true' - requires-go-18: + requires-go-19: required: false - description: 'Set as false if does not require go-18 setup' + description: 'Set as false if does not require go-19 setup' default: 'true' runs: @@ -64,7 +64,7 @@ runs: distribution: 'temurin' java-version: 8 - name: Set Go Version - if: ${{ inputs.requires-go-18 == 'true' }} + if: ${{ inputs.requires-go-19 == 'true' }} uses: actions/setup-go@v3 with: - go-version: '1.18.0' + go-version: '1.19.0' diff --git a/.github/workflows/build_playground_frontend.yml b/.github/workflows/build_playground_frontend.yml index d2f64dfddb24..26800b6cfecb 100644 --- a/.github/workflows/build_playground_frontend.yml +++ b/.github/workflows/build_playground_frontend.yml @@ -34,7 +34,7 @@ jobs: name: Build Playground Frontend App runs-on: [self-hosted, ubuntu-20.04] env: - GO_VERSION: 1.18.0 + GO_VERSION: 1.19.6 BEAM_VERSION: 2.40.0 TERRAFORM_VERSION: 1.0.9 FLUTTER_VERSION: 3.3.2 diff --git a/.github/workflows/git_tag_released_version.yml b/.github/workflows/git_tag_released_version.yml index 37f2c390b7fe..871149bd26a1 100644 --- a/.github/workflows/git_tag_released_version.yml +++ b/.github/workflows/git_tag_released_version.yml @@ -32,7 +32,7 @@ on: jobs: generate_tags: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest env: VERSION_PATH: ${{ github.event.inputs.VERSION_TAG }} steps: diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 22d613ba99a6..49cbe902a7b3 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -35,7 +35,7 @@ concurrency: cancel-in-progress: true jobs: build: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest name: Go Build steps: - name: Check out code @@ -44,7 +44,7 @@ jobs: fetch-depth: 2 - uses: actions/setup-go@v3 with: - go-version: '1.18' + go-version: '1.19' - name: Delete old coverage run: "cd sdks/go/pkg && rm -rf .coverage || :" - name: Run coverage diff --git a/.github/workflows/issue-tagger.yml b/.github/workflows/issue-tagger.yml index 7dbb4bf2d5d4..39f92d87f788 100644 --- a/.github/workflows/issue-tagger.yml +++ b/.github/workflows/issue-tagger.yml @@ -20,7 +20,7 @@ on: jobs: label: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest permissions: issues: write steps: diff --git a/.github/workflows/java_tests.yml b/.github/workflows/java_tests.yml index d5eda7245218..3a94a6bf9ddf 100644 --- a/.github/workflows/java_tests.yml +++ b/.github/workflows/java_tests.yml @@ -43,7 +43,7 @@ jobs: check_gcp_variables: timeout-minutes: 5 name: "Check GCP variables set" - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest outputs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: @@ -66,7 +66,7 @@ jobs: strategy: fail-fast: false matrix: - os: [[self-hosted, ubuntu-20.04], macos-latest, [self-hosted, windows-server-2019]] + os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code uses: actions/checkout@v3 @@ -125,7 +125,7 @@ jobs: strategy: fail-fast: false matrix: - os: [[self-hosted, ubuntu-20.04], macos-latest, [self-hosted, windows-server-2019]] + os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code uses: actions/checkout@v3 @@ -164,7 +164,7 @@ jobs: strategy: fail-fast: false matrix: - os: [[self-hosted, ubuntu-20.04],[self-hosted, windows-server-2019]] + os: [ubuntu-latest,windows-latest] if: | needs.check_gcp_variables.outputs.gcp-variables-set == 'true' && ( (github.event_name == 'push' || github.event_name == 'schedule') || diff --git a/.github/workflows/label_prs.yml b/.github/workflows/label_prs.yml index 02e2207e39a0..aa04506f2bf3 100644 --- a/.github/workflows/label_prs.yml +++ b/.github/workflows/label_prs.yml @@ -21,7 +21,7 @@ on: [pull_request_target] permissions: read-all jobs: label: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest permissions: contents: read pull-requests: write diff --git a/.github/workflows/playground_deploy_backend.yml b/.github/workflows/playground_deploy_backend.yml index b42db8027785..02a4c7992613 100644 --- a/.github/workflows/playground_deploy_backend.yml +++ b/.github/workflows/playground_deploy_backend.yml @@ -34,7 +34,7 @@ jobs: name: Build Playground Backend App runs-on: ubuntu-latest env: - GO_VERSION: 1.18.0 + GO_VERSION: 1.19.6 BEAM_VERSION: 2.40.0 TERRAFORM_VERSION: 1.0.9 STAND_SUFFIX: '' diff --git a/.github/workflows/pr-bot-new-prs.yml b/.github/workflows/pr-bot-new-prs.yml index b511367e672d..8ba27fbec3dc 100644 --- a/.github/workflows/pr-bot-new-prs.yml +++ b/.github/workflows/pr-bot-new-prs.yml @@ -31,7 +31,7 @@ jobs: statuses: read # Don't run on forks if: github.repository == 'apache/beam' - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Setup Node diff --git a/.github/workflows/pr-bot-pr-updates.yml b/.github/workflows/pr-bot-pr-updates.yml index 01e6a31673cb..d96a11368cb8 100644 --- a/.github/workflows/pr-bot-pr-updates.yml +++ b/.github/workflows/pr-bot-pr-updates.yml @@ -31,7 +31,7 @@ jobs: # Don't run on forks if: github.repository == 'apache/beam' - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: # Pin to master so users can't do anything malicious on their own branch and run it here. diff --git a/.github/workflows/pr-bot-prs-needing-attention.yml b/.github/workflows/pr-bot-prs-needing-attention.yml index dd7e47fd4879..e96d3983746b 100644 --- a/.github/workflows/pr-bot-prs-needing-attention.yml +++ b/.github/workflows/pr-bot-prs-needing-attention.yml @@ -31,7 +31,7 @@ jobs: statuses: read # Don't run on forks if: github.repository == 'apache/beam' - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Setup Node diff --git a/.github/workflows/publish_github_release_notes.yml b/.github/workflows/publish_github_release_notes.yml index c8569ab4fc58..246ce690f8b1 100644 --- a/.github/workflows/publish_github_release_notes.yml +++ b/.github/workflows/publish_github_release_notes.yml @@ -31,7 +31,7 @@ permissions: read-all jobs: set-properties: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest outputs: properties: ${{ steps.test-properties.outputs.properties }} steps: @@ -41,7 +41,7 @@ jobs: uses: ./.github/actions/setup-default-test-properties publish_github_release_notes: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest needs: set-properties env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 181a90e1da57..faffbea165fb 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -179,7 +179,7 @@ jobs: - name: Install go uses: actions/setup-go@v3 with: - go-version: '1.18' + go-version: '1.19' - name: Download source from artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/reportGenerator.yml b/.github/workflows/reportGenerator.yml index 97a6056bc4c9..44055cd56310 100644 --- a/.github/workflows/reportGenerator.yml +++ b/.github/workflows/reportGenerator.yml @@ -24,7 +24,7 @@ on: jobs: assign: name: Generate issue report - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Setup Node diff --git a/.github/workflows/self-assign.yml b/.github/workflows/self-assign.yml index 1afe7fbe4301..c6b7cc69ce97 100644 --- a/.github/workflows/self-assign.yml +++ b/.github/workflows/self-assign.yml @@ -23,7 +23,7 @@ jobs: issues: write name: Take or close an issue if: ${{ !github.event.issue.pull_request }} - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: - uses: actions/github-script@v6 with: diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 709c5dddfd23..fa2303a931ec 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -21,7 +21,7 @@ on: permissions: read-all jobs: stale: - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest permissions: issues: write pull-requests: write diff --git a/.github/workflows/triaged-on-assign.yml b/.github/workflows/triaged-on-assign.yml index bebfb9cc5eb6..188b53a86961 100644 --- a/.github/workflows/triaged-on-assign.yml +++ b/.github/workflows/triaged-on-assign.yml @@ -22,7 +22,7 @@ jobs: permissions: issues: write name: Mark issue as triaged when assigned - runs-on: [self-hosted, ubuntu-20.04] + runs-on: ubuntu-latest steps: - run: | ISSUE_NUMBER="$(jq '.issue.number' $GITHUB_EVENT_PATH)" diff --git a/.test-infra/dataproc/flink_cluster.sh b/.test-infra/dataproc/flink_cluster.sh index 645cc3c081f1..b623e890d08f 100755 --- a/.test-infra/dataproc/flink_cluster.sh +++ b/.test-infra/dataproc/flink_cluster.sh @@ -46,8 +46,7 @@ set -Eeuxo pipefail # GCloud properties GCLOUD_ZONE="${GCLOUD_ZONE:=us-central1-a}" -# TODO: replace preview once dataproc 2.1 released -DATAPROC_VERSION="${DATAPROC_VERSION:=preview-debian11}" +DATAPROC_VERSION="${DATAPROC_VERSION:=2.1-debian}" GCLOUD_REGION=`echo $GCLOUD_ZONE | sed -E "s/(-[a-z])?$//"` MASTER_NAME="$CLUSTER_NAME-m" diff --git a/.test-infra/jenkins/CommonJobProperties.groovy b/.test-infra/jenkins/CommonJobProperties.groovy index 768373c4d477..24653bcc4baf 100644 --- a/.test-infra/jenkins/CommonJobProperties.groovy +++ b/.test-infra/jenkins/CommonJobProperties.groovy @@ -69,6 +69,10 @@ class CommonJobProperties { extensions { wipeOutWorkspace() relativeTargetDirectory(checkoutDir) + cloneOptions { + shallow() + noTags() + } if (!allowRemotePoll) { disableRemotePoll() } diff --git a/.test-infra/jenkins/PrecommitJobBuilder.groovy b/.test-infra/jenkins/PrecommitJobBuilder.groovy index 2d7ef1eee1f4..844473114de0 100644 --- a/.test-infra/jenkins/PrecommitJobBuilder.groovy +++ b/.test-infra/jenkins/PrecommitJobBuilder.groovy @@ -47,13 +47,27 @@ class PrecommitJobBuilder { /** Whether to trigger on new PR commits. Useful to set to false when testing new jobs. */ boolean commitTriggering = true + /** + * Whether to trigger on cron run. Useful to set jobs that runs tasks covered by + * other test suites but are deemed to triggered on pull request only. + */ + boolean cronTriggering = true + + /** + * Whether to configure defaultPathTriggers. + * Set to false for PreCommit only runs on certain code path change. + */ + boolean defaultPathTriggering = true + /** * Define a set of pre-commit jobs. * * @param additionalCustomization Job DSL closure with additional customization to apply to the job. */ void build(Closure additionalCustomization = {}) { - defineCronJob additionalCustomization + if (cronTriggering) { + defineCronJob additionalCustomization + } if (commitTriggering) { defineCommitJob additionalCustomization } @@ -82,7 +96,7 @@ class PrecommitJobBuilder { '^gradle.bat$', '^settings.gradle.kts$' ] - if (triggerPathPatterns) { + if (defaultPathTriggering && triggerPathPatterns) { triggerPathPatterns.addAll defaultPathTriggers } job.with { diff --git a/.test-infra/jenkins/README.md b/.test-infra/jenkins/README.md index a99d4b0c9bd7..9b01da4b5a90 100644 --- a/.test-infra/jenkins/README.md +++ b/.test-infra/jenkins/README.md @@ -30,15 +30,42 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PreCommit_CommunityMetrics | [commit](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Phrase/) | `Run CommunityMetrics PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron) | | beam_PreCommit_Go | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Go_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Go_Phrase/) | `Run Go PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron) | | beam_PreCommit_Java | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Phrase/) | `Run Java PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron) | -| beam_PreCommit_Java_Debezium_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/) | | beam_PreCommit_Java_Examples_Dataflow | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Phrase/) | `Run Java_Examples_Dataflow PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron) | | beam_PreCommit_Java_Examples_Dataflow_Java11 | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Phrase/) | `Run Java_Examples_Dataflow_Java11 PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/) | +| beam_PreCommit_Java_Amazon-Web-Services_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct_Cron/) | +| beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct_Cron/) | +| beam_PreCommit_Java_Amqp_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amqp_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amqp_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amqp_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amqp_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Amqp_IO_Direct_Cron/) | +| beam_PreCommit_Java_Azure_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Azure_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Azure_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Azure_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Azure_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Azure_IO_Direct_Cron/) | +| beam_PreCommit_Java_Cassandra_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cassandra_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cassandra_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cassandra_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cassandra_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cassandra_IO_Direct_Cron/) | +| beam_PreCommit_Java_Cdap_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cdap_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cdap_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cdap_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cdap_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cdap_IO_Direct_Cron/) | +| beam_PreCommit_Java_Clickhouse_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Clickhouse_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Clickhouse_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Clickhouse_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Clickhouse_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Clickhouse_IO_Direct_Cron/) | +| beam_PreCommit_Java_Debezium_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/) | +| beam_PreCommit_Java_ElasticSearch_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_ElasticSearch_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_ElasticSearch_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_ElasticSearch_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_ElasticSearch_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_ElasticSearch_IO_Direct_Cron/) | | beam_PreCommit_Java_GCP_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_GCP_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_GCP_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_GCP_IO_Direct_Phrase/) | `Run Java_GCP_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_GCP_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_GCP_IO_Direct_Cron/) | +| beam_PreCommit_Java_Hadoop_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Hadoop_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Hadoop_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Hadoop_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Hadoop_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Hadoop_IO_Direct_Cron/) | +| beam_PreCommit_Java_HBase_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_HBase_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_HBase_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_HBase_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_HBase_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_HBase_IO_Direct_Cron/) | +| beam_PreCommit_Java_HCatalog_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_HCatalog_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_HCatalog_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_HCatalog_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_HCatalog_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_HCatalog_IO_Direct_Cron/) | +| beam_PreCommit_Java_InfluxDb_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_InfluxDb_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_InfluxDb_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_InfluxDb_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_InfluxDb_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_InfluxDb_IO_Direct_Cron/) | | beam_PreCommit_Java_JDBC_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_JDBC_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_JDBC_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_JDBC_IO_Direct_Phrase/) | `Run Java_JDBC_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_JDBC_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_JDBC_IO_Direct_Cron/) | +| beam_PreCommit_Java_Jms_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Jms_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Jms_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Jms_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Jms_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Jms_IO_Direct_Cron/) | | beam_PreCommit_Java_Kafka_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Phrase/) | `Run Java_Kafka_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/) | | beam_PreCommit_Java_Kinesis_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Phrase/) | `Run Java_Kinesis_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/) | +| beam_PreCommit_Java_Kudu_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kudu_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kudu_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kudu_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kudu_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kudu_IO_Direct_Cron/) | +| beam_PreCommit_Java_MongoDb_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_MongoDb_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_MongoDb_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_MongoDb_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_MongoDb_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_MongoDb_IO_Direct_Cron/) | +| beam_PreCommit_Java_Mqtt_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Mqtt_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Mqtt_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Mqtt_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Mqtt_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Mqtt_IO_Direct_Cron/) | +| beam_PreCommit_Java_Neo4j_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/) | | beam_PreCommit_Java_Neo4j_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Phrase/) | `Run Java_Neo4j_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/) | +| beam_PreCommit_Java_Parquet_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Parquet_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Parquet_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Parquet_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Parquet_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Parquet_IO_Direct_Cron/) | +| beam_PreCommit_Java_Pulsar_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Pulsar_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Pulsar_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Pulsar_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Pulsar_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Pulsar_IO_Direct_Cron/) | +| beam_PreCommit_Java_RabbitMq_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_RabbitMq_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_RabbitMq_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_RabbitMq_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_RabbitMq_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_RabbitMq_IO_Direct_Cron/) | +| beam_PreCommit_Java_Redis_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Redis_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Redis_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Redis_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Redis_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Redis_IO_Direct_Cron/) | | beam_PreCommit_Java_SingleStore_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Phrase/) | `Run Java_SingleStore_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/) | +| beam_PreCommit_Java_Snowflake_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Snowflake_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Snowflake_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Snowflake_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Snowflake_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Snowflake_IO_Direct_Cron/) | +| beam_PreCommit_Java_Solr_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Solr_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Solr_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Solr_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Solr_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Solr_IO_Direct_Cron/) | +| beam_PreCommit_Java_Splunk_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Splunk_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Splunk_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Splunk_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Splunk_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Splunk_IO_Direct_Cron/) | +| beam_PreCommit_Java_Thrift_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Thrift_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Thrift_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Thrift_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Thrift_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Thrift_IO_Direct_Cron/) | +| beam_PreCommit_Java_Tika_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Tika_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Tika_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Tika_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Tika_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Tika_IO_Direct_Cron/) | +| beam_PreCommit_Java_IOs_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_IOs_Direct_Commit/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_IOs_Direct_Phrase/) | `Run Java_IOs_Direct PreCommit` | | | beam_PreCommit_Portable_Python | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Phrase/) | `Run Portable_Python PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron) | | beam_PreCommit_PythonLint | [commit](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Phrase/) | `Run PythonLint PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron) | | beam_PreCommit_Python | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Python_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Python_Phrase/) | `Run Python PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron) | @@ -156,7 +183,7 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PerformanceTests_Compressed_TextIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS/) | `Run Java CompressedTextIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS) | | beam_PerformanceTests_HadoopFormat | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat/) | `Run Java HadoopFormatIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat) | | beam_PerformanceTests_JDBC | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC/) | `Run Java JdbcIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC) | -| beam_PerformanceTests_KafkaIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/) | `Run Java KafkaIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO) | +| beam_PerformanceTests_KafkaIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/) | `Run Java KafkaIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO) | | beam_PerformanceTests_ManyFiles_TextIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS/) | `Run Java ManyFilesTextIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS) | | beam_PerformanceTests_MongoDBIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT/) | `Run Java MongoDBIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT) | | beam_PerformanceTests_ParquetIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS/) | `Run Java ParquetIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS) | diff --git a/.test-infra/jenkins/job_PostCommit_Go.groovy b/.test-infra/jenkins/job_PostCommit_Go.groovy index bc18493596a9..3d1c6a3b53ff 100644 --- a/.test-infra/jenkins/job_PostCommit_Go.groovy +++ b/.test-infra/jenkins/job_PostCommit_Go.groovy @@ -22,7 +22,7 @@ import PostcommitJobBuilder // This is the Go postcommit which runs a gradle build, and the current set // of postcommit tests. PostcommitJobBuilder.postCommitJob('beam_PostCommit_Go', 'Run Go PostCommit', - './gradlew :goPostCommit', this) { + 'Go PostCommit (\"Run Go PostCommit\")', this) { description('Runs Go PostCommit tests against master.') previousNames(/beam_PostCommit_Go_GradleBuild/) diff --git a/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Dataflow.groovy b/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Dataflow.groovy index dff173230cba..899f5bd58f10 100644 --- a/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Dataflow.groovy +++ b/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Dataflow.groovy @@ -24,7 +24,7 @@ import static PythonTestProperties.CROSS_LANGUAGE_VALIDATES_RUNNER_PYTHON_VERSIO // This job runs end-to-end cross language GCP IO tests with DataflowRunner. // Collects tests with the @pytest.mark.uses_gcp_java_expansion_service decorator PostcommitJobBuilder.postCommitJob('beam_PostCommit_Python_Xlang_Gcp_Dataflow', - 'Run Python_Xlang_Gcp_Dataflow PostCommit', 'Dataflow Runner CrossLanguage GCP IOs PythonUsingJava Tests', this) { + 'Run Python_Xlang_Gcp_Dataflow PostCommit', 'Python_Xlang_Gcp_Dataflow (\"Run Python_Xlang_Gcp_Dataflow PostCommit\")', this) { description('Runs end-to-end cross language GCP IO tests on the Dataflow runner.') // Set common parameters. @@ -38,10 +38,10 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Python_Xlang_Gcp_Dataflow', // Gradle goals for this job. steps { CROSS_LANGUAGE_VALIDATES_RUNNER_PYTHON_VERSIONS.each { pythonVersion -> - shell("echo \" Running cross language GCP IO tests with python ${pythonVersion} on DataflowRunner.") + shell("echo \"Running cross language GCP IO tests with Python ${pythonVersion} on DataflowRunner.\"") gradle { rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(":sdks:python:test-suites:dataflow:py${getVersionSuffix(pythonVersion)}:gcpCrossLanguagePythonUsingJava") + tasks(":sdks:python:test-suites:dataflow:py${pythonVersion.replace('.', '')}:gcpCrossLanguagePythonUsingJava") commonJobProperties.setGradleSwitches(delegate) } } diff --git a/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Direct.groovy b/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Direct.groovy index af6e22fe180b..7896ece11b22 100644 --- a/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Direct.groovy +++ b/.test-infra/jenkins/job_PostCommit_Python_CrossLanguage_Gcp_Direct.groovy @@ -19,11 +19,13 @@ import CommonJobProperties as commonJobProperties import PostcommitJobBuilder +import static PythonTestProperties.CROSS_LANGUAGE_VALIDATES_RUNNER_PYTHON_VERSIONS + // This job runs end-to-end cross language GCP IO tests with DirectRunner. // Collects tests with the @pytest.mark.uses_gcp_java_expansion_service decorator PostcommitJobBuilder.postCommitJob('beam_PostCommit_Python_Xlang_Gcp_Direct', - 'Run Python_Xlang_Gcp_Direct PostCommit', 'Direct Runner CrossLanguage GCP IOs PythonUsingJava Tests', this) { - description('Runs end-to-end cross language GCP IO tests h Direct runner.') + 'Run Python_Xlang_Gcp_Direct PostCommit', 'Python_Xlang_Gcp_Direct (\"Run Python_Xlang_Gcp_Direct PostCommit\")', this) { + description('Runs end-to-end cross language GCP IO tests on the Direct runner.') // Set common parameters. commonJobProperties.setTopLevelMainJobProperties(delegate) @@ -36,10 +38,10 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Python_Xlang_Gcp_Direct', // Gradle goals for this job. steps { CROSS_LANGUAGE_VALIDATES_RUNNER_PYTHON_VERSIONS.each { pythonVersion -> - shell("echo \"Running cross language GCP IO tests with python ${pythonVersion} on DirectRunner.\"") + shell("echo \"Running cross language GCP IO tests with Python ${pythonVersion} on DirectRunner.\"") gradle { rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(":sdks:python:test-suites:direct:py${getVersionSuffix(pythonVersion)}:gcpCrossLanguagePythonUsingJava") + tasks(":sdks:python:test-suites:direct:py${pythonVersion.replace('.', '')}:gcpCrossLanguagePythonUsingJava") commonJobProperties.setGradleSwitches(delegate) } } diff --git a/.test-infra/jenkins/job_PreCommit_Java_Amqp_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Amqp_IO_Direct.groovy deleted file mode 100644 index 07225b2da1a9..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Amqp_IO_Direct.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Amqp_IO_Direct', - gradleTasks: [ - ':sdks:java:io:amqp:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/amqp/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Cassandra_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Cassandra_IO_Direct.groovy deleted file mode 100644 index 2f081e011e53..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Cassandra_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Cassandra_IO_Direct', - gradleTasks: [ - ':sdks:java:io:cassandra:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/cassandra/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Cdap_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Cdap_IO_Direct.groovy deleted file mode 100644 index e6c88011ef06..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Cdap_IO_Direct.groovy +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Cdap_IO_Direct', - gradleTasks: [ - ':sdks:java:io:cdap:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/hadoop-common/.*$', - '^sdks/java/io/hadoop-format/.*$', - '^sdks/java/io/cdap/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Clickhouse_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Clickhouse_IO_Direct.groovy deleted file mode 100644 index 1c5852cdfaea..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Clickhouse_IO_Direct.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Clickhouse_IO_Direct', - gradleTasks: [ - ':sdks:java:io:clickhouse:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/pulsar/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Debezium_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Debezium_IO_Direct.groovy deleted file mode 100644 index ae404325a4fb..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Debezium_IO_Direct.groovy +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Debezium_IO_Direct', - gradleTasks: [ - ':sdks:java:io:debezium:build', - ':sdks:java:io:debezium:expansion-service:build', - ':sdks:java:io:debezium:integrationTest', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^runners/google-cloud-dataflow-java/.*$', - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/debezium/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_ElasticSearch_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_ElasticSearch_IO_Direct.groovy deleted file mode 100644 index ecc733f0f813..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_ElasticSearch_IO_Direct.groovy +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_ElasticSearch_IO_Direct', - gradleTasks: [ - ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-5:build', - ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-6:build', - ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-7:build', - ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-8:build', - ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-common:build', - ':sdks:java:io:elasticsearch:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/elasticsearch/.*$', - '^sdks/java/io/elasticsearch-tests/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_HCatalog_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_HCatalog_IO_Direct.groovy deleted file mode 100644 index a5c4c27a5795..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_HCatalog_IO_Direct.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_HCatalog_IO_Direct', - gradleTasks: [ - ':sdks:java:io:hcatalog:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/hadoop-common/.*$', - '^sdks/java/io/hcatalog/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Hbase_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Hbase_IO_Direct.groovy deleted file mode 100644 index 7c9231c191fd..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Hbase_IO_Direct.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_HBase_IO_Direct', - gradleTasks: [ - ':sdks:java:io:hbase:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/hadoop-common/.*$', - '^sdks/java/io/hbase/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_IOs.groovy b/.test-infra/jenkins/job_PreCommit_Java_IOs.groovy new file mode 100644 index 000000000000..5555ac192e17 --- /dev/null +++ b/.test-infra/jenkins/job_PreCommit_Java_IOs.groovy @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import PrecommitJobBuilder + +// Define a PreCommit job running IO unit tests that were excluded in the default job_PreCommit_Java.groovy +PrecommitJobBuilder builder = new PrecommitJobBuilder( + scope: this, + nameBase: 'Java_IOs_Direct', + gradleTasks: [ + ':javaioPreCommit', + ], + gradleSwitches: [ + '-PdisableSpotlessCheck=true', + '-PdisableCheckStyle=true' + ], // spotless checked in separate pre-commit + triggerPathPatterns: [ + '^sdks/java/io/common/.*$', + '^sdks/java/core/src/main/.*$', + ], + // disable cron run because the tasks are covered by the single IO precommits below + cronTriggering: false, + timeoutMins: 120, + ) +builder.build { + publishers { + archiveJunit('**/build/test-results/**/*.xml') + } +} + +// define precommit jobs for each of these IO only run on corresponding module code change +def ioModules = [ + 'amqp', + 'cassandra', + 'cdap', + 'clickhouse', + 'debezium', + 'elasticsearch', + 'hbase', + 'hcatalog', + 'influxdb', + 'jms', + 'kudu', + 'mqtt', + 'neo4j', + 'rabbitmq', + 'redis', + 'singlestore', + 'snowflake', + 'solr', + 'splunk', + 'thrift', + 'tika' +] + +// any additional trigger path besides the module path and 'sdk/io/common' +def additionalTriggerPaths = [ + cdap: [ + '^sdks/java/io/hadoop-common/.*$', + '^sdks/java/io/hadoop-format/.*$', + ], + elasticsearch: [ + '^sdks/java/io/elasticsearch-tests/.*$', + ], + hbase: [ + '^sdks/java/io/hadoop-common/.*$', + ], + hcatalog: [ + '^sdks/java/io/hadoop-common/.*$', + ], + neo4j: [ + '^sdks/java/testing/test-utils/.*$', + ], + singlestore: [ + '^sdks/java/testing/test-utils/.*$', + ], + snowflake: [ + '^sdks/java/extensions/google-cloud-platform-core/.*$', + '^sdks/java/testing/test-utils/.*$',] +] + +// any additional tasks besides 'build'. +// Additional :build tasks should be made sync with build.gradle:kts's :javaioPreCommit task which will be triggered on commit to java core and buildSrc +// While integration tasks (e.g. :integrationTest) does not need to add there. +def additionalTasks = [ + debezium: [ + ':sdks:java:io:debezium:expansion-service:build', + ':sdks:java:io:debezium:integrationTest', + ], + elasticsearch: [ + ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-5:build', + ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-6:build', + ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-7:build', + ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-8:build', + ':sdks:java:io:elasticsearch-tests:elasticsearch-tests-common:build', + ], + neo4j: [ + ':sdks:java:io:kinesis:integrationTest', + ], + snowflake: [ + ':sdks:java:io:snowflake:expansion-service:build', + ], +] + +ioModules.forEach { + def triggerPaths = [ + '^sdks/java/io/' + it + '/.*$', + ] + triggerPaths.addAll(additionalTriggerPaths.get(it, [])) + def tasks = [ + ':sdks:java:io:' + it + ':build' + ] + tasks.addAll(additionalTasks.get(it, [])) + PrecommitJobBuilder builderSingle = new PrecommitJobBuilder( + scope: this, + nameBase: 'Java_' + it.capitalize() + '_IO_Direct', + gradleTasks: tasks, + gradleSwitches: [ + '-PdisableSpotlessCheck=true', + '-PdisableCheckStyle=true' + ], // spotless checked in separate pre-commit + triggerPathPatterns: triggerPaths, + defaultPathTriggering: false, + timeoutMins: 60, + ) + builderSingle.build { + publishers { + archiveJunit('**/build/test-results/**/*.xml') + } + } +} diff --git a/.test-infra/jenkins/job_PreCommit_Java_InfluxDb_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_InfluxDb_IO_Direct.groovy deleted file mode 100644 index 3d962128dab4..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_InfluxDb_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_InfluxDb_IO_Direct', - gradleTasks: [ - ':sdks:java:io:influxdb:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/influxdb/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Kudu_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Kudu_IO_Direct.groovy deleted file mode 100644 index d00b5b04bb7a..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Kudu_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Kudu_IO_Direct', - gradleTasks: [ - ':sdks:java:io:kudu:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/kudu/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Mqtt_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Mqtt_IO_Direct.groovy deleted file mode 100644 index 71baed4af786..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Mqtt_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Mqtt_IO_Direct', - gradleTasks: [ - ':sdks:java:io:mqtt:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/mqtt/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Neo4j_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Neo4j_IO_Direct.groovy deleted file mode 100644 index 0d0374d013b0..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Neo4j_IO_Direct.groovy +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Neo4j_IO_Direct', - gradleTasks: [ - ':sdks:java:io:neo4j:build', - ':sdks:java:io:kinesis:integrationTest', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/testing/test-utils/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/neo4j/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_RabbitMq_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_RabbitMq_IO_Direct.groovy deleted file mode 100644 index aa496d636554..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_RabbitMq_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_RabbitMq_IO_Direct', - gradleTasks: [ - ':sdks:java:io:rabbitmq:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/rabbitmq/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Redis_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Redis_IO_Direct.groovy deleted file mode 100644 index 267e5addc359..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Redis_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Redis_IO_Direct', - gradleTasks: [ - ':sdks:java:io:redis:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/redis/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Singlestore_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Singlestore_IO_Direct.groovy deleted file mode 100644 index 1dfbda362dd7..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Singlestore_IO_Direct.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_SingleStore_IO_Direct', - gradleTasks: [ - ':sdks:java:io:singlestore:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/testing/test-utils/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/singlestore/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Snowflake_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Snowflake_IO_Direct.groovy deleted file mode 100644 index d20c214910d0..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Snowflake_IO_Direct.groovy +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Snowflake_IO_Direct', - gradleTasks: [ - ':sdks:java:io:snowflake:build', - ':sdks:java:io:snowflake:expansion-service:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/extensions/google-cloud-platform-core/.*$', - '^sdks/java/testing/test-utils/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/snowflake/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Solr_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Solr_IO_Direct.groovy deleted file mode 100644 index 0adc99cef634..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Solr_IO_Direct.groovy +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Solr_IO_Direct', - gradleTasks: [ - ':sdks:java:io:solr:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/common/.*$', - '^sdks/java/io/solr/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Splunk_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Splunk_IO_Direct.groovy deleted file mode 100644 index 606d8852a66e..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Splunk_IO_Direct.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Splunk_IO_Direct', - gradleTasks: [ - ':sdks:java:io:splunk:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/splunk/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Thrift_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Thrift_IO_Direct.groovy deleted file mode 100644 index 2d4556642617..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Thrift_IO_Direct.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Thrift_IO_Direct', - gradleTasks: [ - ':sdks:java:io:thrift:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/thrift/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Tika_IO_Direct.groovy b/.test-infra/jenkins/job_PreCommit_Java_Tika_IO_Direct.groovy deleted file mode 100644 index 694ea38fccc4..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Java_Tika_IO_Direct.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Tika_IO_Direct', - gradleTasks: [ - ':sdks:java:io:tika:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/tika/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} diff --git a/CHANGES.md b/CHANGES.md index 252fba0ca044..e29944a092f4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -50,6 +50,39 @@ * ([#X](https://github.com/apache/beam/issues/X)). --> +# [2.47.0] - Unreleased + +## Highlights + +* New highly anticipated feature X added to Python SDK ([#X](https://github.com/apache/beam/issues/X)). +* New highly anticipated feature Y added to Java SDK ([#Y](https://github.com/apache/beam/issues/Y)). + +## I/Os + +* Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* BigQuery Storage Write API is now available in Python SDK via cross-language ([#21961](https://github.com/apache/beam/issues/21961)). + +## New Features / Improvements + +* The Flink runner now supports Flink 1.16.x ([#25046](https://github.com/apache/beam/issues/25046)). +* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). + +## Breaking Changes + +* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). + +## Deprecations + +* X behavior is deprecated and will be removed in X versions ([#X](https://github.com/apache/beam/issues/X)). + +## Bugfixes + +* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). + +## Known Issues + +* ([#X](https://github.com/apache/beam/issues/X)). + # [2.46.0] - Unreleased ## Highlights @@ -60,12 +93,15 @@ container was based upon Debian 11. * RunInference PTransform will accept model paths as SideInputs in Python SDK. ([#24042](https://github.com/apache/beam/issues/24042)) * RunInference supports ONNX runtime in Python SDK ([#22972](https://github.com/apache/beam/issues/22972)) +* Tensorflow Model Handler for RunInference in Python SDK ([#25366](https://github.com/apache/beam/issues/25366)) +* Java SDK modules migrated to use `:sdks:java:extensions:avro` ([#24748](https://github.com/apache/beam/issues/24748)) ## I/Os * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Added in JmsIO a retry policy for failed publications (Java) ([#24971](https://github.com/apache/beam/issues/24971)). * Support for `LZMA` compression/decompression of text files added to the Python SDK ([#25316](https://github.com/apache/beam/issues/25316)) +* Added ReadFrom/WriteTo Csv/Json as top-level transforms to the Python SDK. ## New Features / Improvements @@ -75,6 +111,9 @@ * Add `WatchFilePattern` transform, which can be used as a side input to the RunInference PTransfrom to watch for model updates using a file pattern. ([#24042](https://github.com/apache/beam/issues/24042)) * Add support for loading TorchScript models with `PytorchModelHandler`. The TorchScript model path can be passed to PytorchModelHandler using `torch_script_model_path=`. ([#25321](https://github.com/apache/beam/pull/25321)) +* The Go SDK now requires Go 1.19 to build. ([#25545](https://github.com/apache/beam/pull/25545)) +* The Go SDK now has an initial native Go implementation of a portable Beam Runner called Prism. ([#24789](https://github.com/apache/beam/pull/24789)) + * For more details and current state see https://github.com/apache/beam/tree/master/sdks/go/pkg/beam/runners/prism. ## Breaking Changes @@ -87,7 +126,9 @@ ## Deprecations -* X behavior is deprecated and will be removed in X versions ([#X](https://github.com/apache/beam/issues/X)). +* Avro related classes are deprecated in module `beam-sdks-java-core` and will be eventually removed. Please, migrate to a new module `beam-sdks-java-extensions-avro` instead by importing the classes from `org.apache.beam.sdk.extensions.avro` package. + For the sake of migration simplicity, the relative package path and the whole class hierarchy of Avro related classes in new module is preserved the same as it was before. + For example, import `org.apache.beam.sdk.extensions.avro.coders.AvroCoder` class instead of`org.apache.beam.sdk.coders.AvroCoder`. ([#24749](https://github.com/apache/beam/issues/24749)). ## Bugfixes diff --git a/build.gradle.kts b/build.gradle.kts index 8a244e6f7ec1..727f6e1e1a4b 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -242,6 +242,8 @@ tasks.register("javaPreCommit") { dependsOn(":runners:flink:1.14:job-server:build") dependsOn(":runners:flink:1.15:build") dependsOn(":runners:flink:1.15:job-server:build") + dependsOn(":runners:flink:1.16:build") + dependsOn(":runners:flink:1.16:job-server:build") dependsOn(":runners:google-cloud-dataflow-java:build") dependsOn(":runners:google-cloud-dataflow-java:examples-streaming:build") dependsOn(":runners:google-cloud-dataflow-java:examples:build") @@ -263,6 +265,7 @@ tasks.register("javaPreCommit") { dependsOn(":sdks:java:expansion-service:build") dependsOn(":sdks:java:expansion-service:app:build") dependsOn(":sdks:java:extensions:arrow:build") + dependsOn(":sdks:java:extensions:avro:build") dependsOn(":sdks:java:extensions:euphoria:build") dependsOn(":sdks:java:extensions:google-cloud-platform-core:build") dependsOn(":sdks:java:extensions:jackson:build") @@ -315,6 +318,36 @@ tasks.register("javaPreCommit") { dependsOn(":sdks:java:container:java8:docker") } +// a precommit task build multiple IOs (except those splitting into single jobs) +tasks.register("javaioPreCommit") { + dependsOn(":sdks:java:io:amqp:build") + dependsOn(":sdks:java:io:cassandra:build") + dependsOn(":sdks:java:io:cdap:build") + dependsOn(":sdks:java:io:clickhouse:build") + dependsOn(":sdks:java:io:debezium:expansion-service:build") + dependsOn(":sdks:java:io:debezium:build") + dependsOn(":sdks:java:io:elasticsearch-tests:elasticsearch-tests-5:build") + dependsOn(":sdks:java:io:elasticsearch-tests:elasticsearch-tests-6:build") + dependsOn(":sdks:java:io:elasticsearch-tests:elasticsearch-tests-7:build") + dependsOn(":sdks:java:io:elasticsearch-tests:elasticsearch-tests-8:build") + dependsOn(":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common:build") + dependsOn(":sdks:java:io:elasticsearch:build") + dependsOn(":sdks:java:io:hbase:build") + dependsOn(":sdks:java:io:hcatalog:build") + dependsOn(":sdks:java:io:influxdb:build") + dependsOn(":sdks:java:io:jms:build") + dependsOn(":sdks:java:io:kudu:build") + dependsOn(":sdks:java:io:mqtt:build") + dependsOn(":sdks:java:io:neo4j:build") + dependsOn(":sdks:java:io:rabbitmq:build") + dependsOn(":sdks:java:io:redis:build") + dependsOn(":sdks:java:io:singlestore:build") + dependsOn(":sdks:java:io:solr:build") + dependsOn(":sdks:java:io:splunk:build") + dependsOn(":sdks:java:io:thrift:build") + dependsOn(":sdks:java:io:tika:build") +} + tasks.register("sqlPreCommit") { dependsOn(":sdks:java:extensions:sql:runBasicExample") dependsOn(":sdks:java:extensions:sql:runPojoExample") @@ -338,6 +371,7 @@ tasks.register("javaPostCommitSickbay") { dependsOn(":runners:flink:1.13:validatesRunnerSickbay") dependsOn(":runners:flink:1.14:validatesRunnerSickbay") dependsOn(":runners:flink:1.15:validatesRunnerSickbay") + dependsOn(":runners:flink:1.16:validatesRunnerSickbay") dependsOn(":runners:spark:3:job-server:validatesRunnerSickbay") dependsOn(":runners:direct-java:validatesRunnerSickbay") dependsOn(":runners:portability:java:validatesRunnerSickbay") diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 139d95a3ccdf..adc39ed92124 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -318,6 +318,48 @@ class BeamModulePlugin implements Plugin { } } + // A class defining the common properties in a given suite of cross-language tests + // Properties are shared across runners and are used when creating a CrossLanguageUsingJavaExpansionConfiguration object + static class CrossLanguageTaskCommon { + // Used as the task name for cross-language + String name + // The expansion service's project path (required) + String expansionProjectPath + // Collect Python pipeline tests with this marker + String collectMarker + // Job server startup task. + TaskProvider startJobServer + // Job server cleanup task. + TaskProvider cleanupJobServer + } + + // A class defining the configuration for CrossLanguageUsingJavaExpansion. + static class CrossLanguageUsingJavaExpansionConfiguration { + // Task name for cross-language tests using Java expansion. + String name = 'crossLanguageUsingJavaExpansion' + // Python pipeline options to use. + List pythonPipelineOptions = [ + "--runner=PortableRunner", + "--job_endpoint=localhost:8099", + "--environment_cache_millis=10000", + "--experiments=beam_fn_api", + ] + // Additional pytest options + List pytestOptions = [] + // Job server startup task. + TaskProvider startJobServer + // Job server cleanup task. + TaskProvider cleanupJobServer + // Number of parallel test runs. + Integer numParallelTests = 1 + // Whether the pipeline needs --sdk_location option + boolean needsSdkLocation = false + // Project path for the expansion service to start up + String expansionProjectPath + // Collect Python pipeline tests with this marker + String collectMarker + } + // A class defining the configuration for CrossLanguageValidatesRunner. static class CrossLanguageValidatesRunnerConfiguration { // Task name for cross-language validate runner case. @@ -356,10 +398,6 @@ class BeamModulePlugin implements Plugin { String semiPersistDir = "/tmp" // classpath for running tests. FileCollection classpath - // Expansion service to start up - String expansionJar - // Custom collect for Python pipeline tests that use Java expansion - String collectTestsWithDecorator = "uses_java_expansion_service" } def isRelease(Project project) { @@ -394,7 +432,7 @@ class BeamModulePlugin implements Plugin { // Automatically use the official release version if we are performing a release // otherwise append '-SNAPSHOT' - project.version = '2.46.0' + project.version = '2.47.0' if (!isRelease(project)) { project.version += '-SNAPSHOT' } @@ -2019,7 +2057,7 @@ class BeamModulePlugin implements Plugin { def goRootDir = "${project.rootDir}/sdks/go" // This sets the whole project Go version. - project.ext.goVersion = "go1.19.3" + project.ext.goVersion = "go1.19.6" // Minor TODO: Figure out if we can pull out the GOCMD env variable after goPrepare script // completion, and avoid this GOBIN substitution. @@ -2357,6 +2395,98 @@ class BeamModulePlugin implements Plugin { } } + /** ***********************************************************************************************/ + // Method to create the createCrossLanguageUsingJavaExpansionTask. + // The method takes CrossLanguageUsingJavaExpansionConfiguration as parameter. + // This method creates a task that runs Python SDK pipeline tests that use Java transforms via an input expansion service + project.ext.createCrossLanguageUsingJavaExpansionTask = { + // This task won't work if the python build file doesn't exist. + if (!project.project(":sdks:python").buildFile.exists()) { + System.err.println 'Python build file not found. Skipping createCrossLanguageUsingJavaExpansionTask.' + return + } + def config = it ? it as CrossLanguageUsingJavaExpansionConfiguration : new CrossLanguageUsingJavaExpansionConfiguration() + + project.evaluationDependsOn(":sdks:python") + project.evaluationDependsOn(config.expansionProjectPath) + project.evaluationDependsOn(":runners:core-construction-java") + project.evaluationDependsOn(":sdks:java:extensions:python") + + // Setting up args to launch the expansion service + def envDir = project.project(":sdks:python").envdir + def pythonDir = project.project(":sdks:python").projectDir + def javaExpansionPort = getRandomPort() + def expansionJar = project.project(config.expansionProjectPath).buildTestExpansionServiceJar.archivePath + def javaClassLookupAllowlistFile = project.project(config.expansionProjectPath).projectDir.getPath() + def expansionServiceOpts = [ + "group_id": project.name, + "java_expansion_service_jar": expansionJar, + "java_port": javaExpansionPort, + "java_expansion_service_allowlist_file": javaClassLookupAllowlistFile, + ] + def serviceArgs = project.project(':sdks:python').mapToArgString(expansionServiceOpts) + def javaContainerSuffix + if (JavaVersion.current() == JavaVersion.VERSION_1_8) { + javaContainerSuffix = 'java8' + } else if (JavaVersion.current() == JavaVersion.VERSION_11) { + javaContainerSuffix = 'java11' + } else if (JavaVersion.current() == JavaVersion.VERSION_17) { + javaContainerSuffix = 'java17' + } else { + String exceptionMessage = "Your Java version is unsupported. You need Java version of 8 or 11 or 17 to get started, but your Java version is: " + JavaVersion.current(); + throw new GradleException(exceptionMessage) + } + + // 1. Builds the chosen expansion service jar and launches it + def setupTask = project.tasks.register(config.name+"Setup", Exec) { + dependsOn ':sdks:java:container:'+javaContainerSuffix+':docker' + dependsOn config.expansionProjectPath+':buildTestExpansionServiceJar' + dependsOn ":sdks:python:installGcpTest" + // setup test env + executable 'sh' + args '-c', "$pythonDir/scripts/run_expansion_services.sh stop --group_id ${project.name} && $pythonDir/scripts/run_expansion_services.sh start $serviceArgs" + } + + // 2. Sets up, collects, and runs Python pipeline tests + def sdkLocationOpt = [] + if (config.needsSdkLocation) { + setupTask.configure {dependsOn ':sdks:python:sdist'} + sdkLocationOpt = [ + "--sdk_location=${pythonDir}/build/apache-beam.tar.gz" + ] + } + def beamPythonTestPipelineOptions = [ + "pipeline_opts": config.pythonPipelineOptions + sdkLocationOpt, + "test_opts": config.pytestOptions, + "suite": config.name, + "collect": config.collectMarker, + ] + def cmdArgs = project.project(':sdks:python').mapToArgString(beamPythonTestPipelineOptions) + def pythonTask = project.tasks.register(config.name+"PythonUsingJava", Exec) { + group = "Verification" + description = "Runs Python SDK pipeline tests that use a Java expansion service" + environment "EXPANSION_JAR", expansionJar + environment "EXPANSION_PORT", javaExpansionPort + executable 'sh' + args '-c', ". $envDir/bin/activate && cd $pythonDir && ./scripts/run_integration_test.sh $cmdArgs" + dependsOn setupTask + dependsOn config.startJobServer + } + + // 3. Shuts down the expansion service + def cleanupTask = project.tasks.register(config.name+'Cleanup', Exec) { + // teardown test env + executable 'sh' + args '-c', "$pythonDir/scripts/run_expansion_services.sh stop --group_id ${project.name}" + } + + setupTask.configure {finalizedBy cleanupTask} + config.startJobServer.configure {finalizedBy config.cleanupJobServer} + + cleanupTask.configure{mustRunAfter pythonTask} + config.cleanupJobServer.configure{mustRunAfter pythonTask} + } + /** ***********************************************************************************************/ // Method to create the crossLanguageValidatesRunnerTask. @@ -2380,7 +2510,7 @@ class BeamModulePlugin implements Plugin { def pythonDir = project.project(":sdks:python").projectDir def javaPort = getRandomPort() def pythonPort = getRandomPort() - def expansionJar = config.expansionJar ?: project.project(':sdks:java:testing:expansion-service').buildTestExpansionServiceJar.archivePath + def expansionJar = project.project(':sdks:java:testing:expansion-service').buildTestExpansionServiceJar.archivePath def javaClassLookupAllowlistFile = project.project(":sdks:java:testing:expansion-service").projectDir.getPath() + "/src/test/resources/test_expansion_service_allowlist.yaml" def expansionServiceOpts = [ "group_id": project.name, @@ -2480,7 +2610,7 @@ class BeamModulePlugin implements Plugin { "suite": "xlangValidateRunner", ] if (sdk == "Java") { - beamPythonTestPipelineOptions["collect"] = config.collectTestsWithDecorator + beamPythonTestPipelineOptions["collect"] = "uses_java_expansion_service" } else if (sdk == "Python") { beamPythonTestPipelineOptions["collect"] = "uses_python_expansion_service" } else { diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index 3e17c9c96777..1301baa041f4 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -78,7 +78,7 @@ RUN pip3 install distlib==0.3.1 yapf==0.29.0 pytest ### # Install Go ### -ENV DOWNLOAD_GO_VERSION=1.17.6 +ENV DOWNLOAD_GO_VERSION=1.19.6 RUN wget https://golang.org/dl/go${DOWNLOAD_GO_VERSION}.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go${DOWNLOAD_GO_VERSION}.linux-amd64.tar.gz ENV GOROOT /usr/local/go diff --git a/examples/java/build.gradle b/examples/java/build.gradle index aa51dcfeae85..994001c573f7 100644 --- a/examples/java/build.gradle +++ b/examples/java/build.gradle @@ -56,6 +56,7 @@ dependencies { implementation library.java.vendored_guava_26_0_jre implementation library.java.kafka_clients implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:google-cloud-platform-core") implementation project(":sdks:java:extensions:python") implementation project(":sdks:java:io:google-cloud-platform") diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java b/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java index 449f19cbf75a..99f509f9f7b8 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java @@ -39,8 +39,8 @@ import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; import org.apache.beam.sdk.io.gcp.datastore.DatastoreIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java index 7d39bdb5e2b6..d3e5144d5338 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java @@ -49,8 +49,8 @@ import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; import org.apache.beam.sdk.options.Default; diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java index d3f66e5ff3c6..4705d461f3c9 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java @@ -55,8 +55,8 @@ import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; import org.apache.beam.sdk.options.Default; diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/game/UserScore.java b/examples/java/src/main/java/org/apache/beam/examples/complete/game/UserScore.java index b1aedacecfbd..8baa82cad8d9 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/game/UserScore.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/game/UserScore.java @@ -23,8 +23,8 @@ import org.apache.avro.reflect.Nullable; import org.apache.beam.examples.complete.game.utils.WriteToText; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/avro/AvroDataClass.java b/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/avro/AvroDataClass.java index 8c8702115f65..f7c01e70c632 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/avro/AvroDataClass.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/avro/AvroDataClass.java @@ -17,8 +17,8 @@ */ package org.apache.beam.examples.complete.kafkatopubsub.avro; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** * Example of AVRO serialization class. To configure your AVRO schema, change this class to diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/transforms/FormatTransform.java b/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/transforms/FormatTransform.java index d493c0648180..1dcf9e196221 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/transforms/FormatTransform.java +++ b/examples/java/src/main/java/org/apache/beam/examples/complete/kafkatopubsub/transforms/FormatTransform.java @@ -23,10 +23,10 @@ import org.apache.beam.examples.complete.kafkatopubsub.avro.AvroDataClassKafkaAvroDeserializer; import org.apache.beam.examples.complete.kafkatopubsub.kafka.consumer.SslConsumerFactoryFn; import org.apache.beam.examples.complete.kafkatopubsub.options.KafkaToPubsubOptions; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.NullableCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; import org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage; import org.apache.beam.sdk.io.kafka.KafkaIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java b/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java index 274f48f11bb7..cf6a968c03be 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java +++ b/examples/java/src/main/java/org/apache/beam/examples/snippets/Snippets.java @@ -45,10 +45,10 @@ import javax.annotation.Nullable; import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.DefaultCoder; import org.apache.beam.sdk.coders.DoubleCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.ml.AnnotateText; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.FileIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/snippets/transforms/io/gcp/bigquery/BigQueryMyData.java b/examples/java/src/main/java/org/apache/beam/examples/snippets/transforms/io/gcp/bigquery/BigQueryMyData.java index 5641415d7a4f..03b1b4c41425 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/snippets/transforms/io/gcp/bigquery/BigQueryMyData.java +++ b/examples/java/src/main/java/org/apache/beam/examples/snippets/transforms/io/gcp/bigquery/BigQueryMyData.java @@ -30,8 +30,8 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) diff --git a/examples/java/src/main/java/org/apache/beam/examples/subprocess/utils/ExecutableFile.java b/examples/java/src/main/java/org/apache/beam/examples/subprocess/utils/ExecutableFile.java index 036b3d25ca09..1b157d237429 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/subprocess/utils/ExecutableFile.java +++ b/examples/java/src/main/java/org/apache/beam/examples/subprocess/utils/ExecutableFile.java @@ -18,8 +18,8 @@ package org.apache.beam.examples.subprocess.utils; import org.apache.beam.examples.subprocess.configuration.SubProcessConfiguration; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** Contains the configuration for the external library. */ @DefaultCoder(AvroCoder.class) diff --git a/examples/java/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java b/examples/java/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java index e48c2471cea7..fc449909e139 100644 --- a/examples/java/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java +++ b/examples/java/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java @@ -24,7 +24,7 @@ import org.apache.beam.examples.complete.game.LeaderBoard.CalculateTeamScores; import org.apache.beam.examples.complete.game.LeaderBoard.CalculateUserScores; import org.apache.beam.examples.complete.game.UserScore.GameActionInfo; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; diff --git a/examples/java/src/test/java/org/apache/beam/examples/complete/game/StatefulTeamScoreTest.java b/examples/java/src/test/java/org/apache/beam/examples/complete/game/StatefulTeamScoreTest.java index 6d4c964d8fbc..d94d74d2a85b 100644 --- a/examples/java/src/test/java/org/apache/beam/examples/complete/game/StatefulTeamScoreTest.java +++ b/examples/java/src/test/java/org/apache/beam/examples/complete/game/StatefulTeamScoreTest.java @@ -19,9 +19,9 @@ import org.apache.beam.examples.complete.game.StatefulTeamScore.UpdateTeamScoreFn; import org.apache.beam.examples.complete.game.UserScore.GameActionInfo; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.testing.TestStream; diff --git a/examples/kotlin/build.gradle b/examples/kotlin/build.gradle index 79a1248712d0..6fb3ef64e282 100644 --- a/examples/kotlin/build.gradle +++ b/examples/kotlin/build.gradle @@ -55,6 +55,7 @@ dependencies { // Add the dependency that sdks:java:core that is marked as provided implementation library.java.hamcrest permitUnusedDeclared library.java.hamcrest + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:google-cloud-platform-core") implementation project(":sdks:java:io:google-cloud-platform") implementation library.java.avro diff --git a/examples/kotlin/src/main/java/org/apache/beam/examples/kotlin/snippets/Snippets.kt b/examples/kotlin/src/main/java/org/apache/beam/examples/kotlin/snippets/Snippets.kt index d2e2a5883371..2ba7b3742e16 100644 --- a/examples/kotlin/src/main/java/org/apache/beam/examples/kotlin/snippets/Snippets.kt +++ b/examples/kotlin/src/main/java/org/apache/beam/examples/kotlin/snippets/Snippets.kt @@ -23,7 +23,7 @@ import com.google.api.services.bigquery.model.* import com.google.common.collect.ImmutableList import edu.umd.cs.findbugs.annotations.SuppressFBWarnings import org.apache.beam.sdk.Pipeline -import org.apache.beam.sdk.coders.AvroCoder +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder import org.apache.beam.sdk.coders.DefaultCoder import org.apache.beam.sdk.coders.DoubleCoder import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO diff --git a/gradle.properties b/gradle.properties index 9f65ed82f96a..ce0cfa4a7510 100644 --- a/gradle.properties +++ b/gradle.properties @@ -30,13 +30,13 @@ signing.gnupg.useLegacyGpg=true # buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy. # To build a custom Beam version make sure you change it in both places, see # https://github.com/apache/beam/issues/21302. -version=2.46.0-SNAPSHOT -sdk_version=2.46.0.dev +version=2.47.0-SNAPSHOT +sdk_version=2.47.0.dev javaVersion=1.8 docker_image_default_repo_root=apache docker_image_default_repo_prefix=beam_ -flink_versions=1.12,1.13,1.14,1.15 +flink_versions=1.12,1.13,1.14,1.15,1.16 diff --git a/release/src/main/scripts/build_release_candidate.sh b/release/src/main/scripts/build_release_candidate.sh index 127e83fe0b45..0b78296fe35c 100755 --- a/release/src/main/scripts/build_release_candidate.sh +++ b/release/src/main/scripts/build_release_candidate.sh @@ -377,7 +377,8 @@ if [[ $confirmation = "y" ]]; then cd ${BEAM_ROOT_DIR} RELEASE_COMMIT=$(git rev-list -n 1 "tags/${RC_TAG}") # TODO(https://github.com/apache/beam/issues/20209): Don't hardcode py version in this file. - cd sdks/python && pip install -r build-requirements.txt && tox -e py38-docs + # TODO(https://github.com/apache/beam/issues/25649): Remove intermediate gen_protos step. + cd sdks/python && pip install -r build-requirements.txt && python gen_protos.py && tox -e py38-docs GENERATED_PYDOC=~/${LOCAL_WEBSITE_UPDATE_DIR}/${LOCAL_PYTHON_DOC}/${BEAM_ROOT_DIR}/sdks/python/target/docs/_build rm -rf ${GENERATED_PYDOC}/.doctrees diff --git a/release/src/main/scripts/mass_comment.py b/release/src/main/scripts/mass_comment.py index cb60bf6d49d0..3d56225d90f6 100644 --- a/release/src/main/scripts/mass_comment.py +++ b/release/src/main/scripts/mass_comment.py @@ -17,20 +17,14 @@ """Script for mass-commenting Jenkins test triggers on a Beam PR.""" -import itertools -import os import socket -import sys -import time -import traceback -import re import requests -from datetime import datetime # This list can be found by querying the Jenkins API, see BEAM-13951 COMMENTS_TO_ADD = [ "Run CommunityMetrics PreCommit", "Run Dataflow Runner Nexmark Tests", + "Run Dataflow Runner Tpcds Tests", "Run Dataflow Runner V2 Java 11 Nexmark Tests", "Run Dataflow Runner V2 Java 17 Nexmark Tests", "Run Dataflow Runner V2 Nexmark Tests", @@ -45,6 +39,7 @@ "Run Direct ValidatesRunner", "Run Flink Runner Nexmark Tests", "Run Flink ValidatesRunner Java 11", + "Run Flink Runner Tpcds Tests", "Run Flink ValidatesRunner", "Run Go Flink ValidatesRunner", "Run Go PostCommit", @@ -61,6 +56,7 @@ "Run Java Examples_Flink", "Run Java Examples_Spark", "Run Java Flink PortableValidatesRunner Streaming", + "Run Java Portability examples on Dataflow with Java 11", "Run Java PostCommit", "Run Java PreCommit", "Run Java Samza PortableValidatesRunner", @@ -70,11 +66,44 @@ "Run Java examples on Dataflow Java 11", "Run Java examples on Dataflow Java 17", "Run Java examples on Dataflow with Java 11", + "Run Java_Amazon-Web-Services2_IO_Direct PreCommit", + "Run Java_Amazon-Web-Services_IO_Direct PreCommit", + "Run Java_Amqp_IO_Direct PreCommit", + "Run Java_Azure_IO_Direct PreCommit", + "Run Java_Cassandra_IO_Direct PreCommit", + "Run Java_Cdap_IO_Direct PreCommit", + "Run Java_Clickhouse_IO_Direct PreCommit", + "Run Java_Debezium_IO_Direct PreCommit", + "Run Java_ElasticSearch_IO_Direct PreCommit", "Run Java_Examples_Dataflow PreCommit", "Run Java_Examples_Dataflow_Java11 PreCommit", "Run Java_Examples_Dataflow_Java17 PreCommit", + "Run Java_GCP_IO_Direct PreCommit", + "Run Java_HBase_IO_Direct PreCommit", + "Run Java_HCatalog_IO_Direct PreCommit", + "Run Java_Hadoop_IO_Direct PreCommit", + "Run Java_InfluxDb_IO_Direct PreCommit", + "Run Java_JDBC_IO_Direct PreCommit", + "Run Java_Jms_IO_Direct PreCommit", + "Run Java_Kafka_IO_Direct PreCommit", + "Run Java_Kinesis_IO_Direct PreCommit", + "Run Java_Kudu_IO_Direct PreCommit", + "Run Java_MongoDb_IO_Direct PreCommit", + "Run Java_Mqtt_IO_Direct PreCommit", + "Run Java_Neo4j_IO_Direct PreCommit", "Run Java_PVR_Flink_Batch PreCommit", "Run Java_PVR_Flink_Docker PreCommit", + "Run Java_Parquet_IO_Direct PreCommit", + "Run Java_Pulsar_IO_Direct PreCommit", + "Run Java_RabbitMq_IO_Direct PreCommit", + "Run Java_Redis_IO_Direct PreCommit", + "Run Java_SingleStore_IO_Direct PreCommit", + "Run Java_Snowflake_IO_Direct PreCommit", + "Run Java_Solr_IO_Direct PreCommit", + "Run Java_Spark3_Versions PreCommit", + "Run Java_Splunk_IO_Direct PreCommit", + "Run Java_Thrift_IO_Direct PreCommit", + "Run Java_Tika_IO_Direct PreCommit", "Run Javadoc PostCommit", "Run Jpms Dataflow Java 11 PostCommit", "Run Jpms Dataflow Java 17 PostCommit", @@ -82,6 +111,7 @@ "Run Jpms Direct Java 17 PostCommit", "Run Jpms Flink Java 11 PostCommit", "Run Jpms Spark Java 11 PostCommit", + "Run Kotlin_Examples PreCommit", "Run PortableJar_Flink PostCommit", "Run PortableJar_Spark PostCommit", "Run Portable_Python PreCommit", @@ -95,6 +125,7 @@ "Run Python Dataflow V2 ValidatesRunner", "Run Python Dataflow ValidatesContainer", "Run Python Dataflow ValidatesRunner", + "Run Python Direct Runner Nexmark Tests", "Run Python Examples_Dataflow", "Run Python Examples_Direct", "Run Python Examples_Flink", @@ -107,7 +138,13 @@ "Run PythonDocs PreCommit", "Run PythonFormatter PreCommit", "Run PythonLint PreCommit", + "Run Python_Coverage PreCommit", + "Run Python_Dataframes PreCommit", + "Run Python_Examples PreCommit", + "Run Python_Integration PreCommit", "Run Python_PVR_Flink PreCommit", + "Run Python_Runners PreCommit", + "Run Python_Transforms PreCommit", "Run RAT PreCommit", "Run Release Gradle Build", "Run SQL PostCommit", @@ -116,6 +153,7 @@ "Run SQL_Java17 PreCommit", "Run Samza ValidatesRunner", "Run Spark Runner Nexmark Tests", + "Run Spark Runner Tpcds Tests", "Run Spark StructuredStreaming ValidatesRunner", "Run Spark ValidatesRunner Java 11", "Run Spark ValidatesRunner", @@ -126,6 +164,7 @@ "Run Whitespace PreCommit", "Run XVR_Direct PostCommit", "Run XVR_Flink PostCommit", + "Run XVR_GoUsingJava_Dataflow PostCommit", "Run XVR_JavaUsingPython_Dataflow PostCommit", "Run XVR_PythonUsingJavaSQL_Dataflow PostCommit", "Run XVR_PythonUsingJava_Dataflow PostCommit", diff --git a/runners/core-construction-java/build.gradle b/runners/core-construction-java/build.gradle index 8e91e156ae8b..355533fa9f65 100644 --- a/runners/core-construction-java/build.gradle +++ b/runners/core-construction-java/build.gradle @@ -53,6 +53,7 @@ dependencies { implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(path: ":sdks:java:extensions:avro") implementation project(path: ":sdks:java:fn-execution") implementation library.java.vendored_grpc_1_48_1 implementation library.java.vendored_guava_26_0_jre @@ -73,5 +74,6 @@ dependencies { testImplementation library.java.jackson_dataformat_yaml testImplementation project(path: ":model:fn-execution", configuration: "shadow") testImplementation project(path: ":sdks:java:core", configuration: "testRuntimeMigration") + testImplementation project(path: ":sdks:java:extensions:avro") testRuntimeOnly library.java.slf4j_jdk14 } diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderRegistrar.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderRegistrar.java index 1ccd57935b6e..1797718d6ed1 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderRegistrar.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderRegistrar.java @@ -19,8 +19,8 @@ import com.google.auto.service.AutoService; import java.util.Map; -import org.apache.beam.sdk.coders.AvroGenericCoder; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; /** Coder registrar for AvroGenericCoder. */ diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderTranslator.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderTranslator.java index 53a566c1b52c..614810abbdbc 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderTranslator.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/AvroGenericCoderTranslator.java @@ -21,8 +21,8 @@ import java.util.List; import org.apache.avro.Schema; import org.apache.beam.runners.core.construction.CoderTranslation.TranslationContext; -import org.apache.beam.sdk.coders.AvroGenericCoder; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets; /** Coder translator for AvroGenericCoder. */ diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java index c59ca23fd77c..646e9bc744ca 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java @@ -423,6 +423,7 @@ public static Set getJavaCapabilities() { capabilities.add("beam:version:sdk_base:" + JAVA_SDK_HARNESS_CONTAINER_URL); capabilities.add(BeamUrns.getUrn(SplittableParDoComponents.TRUNCATE_SIZED_RESTRICTION)); capabilities.add(BeamUrns.getUrn(Primitives.TO_STRING)); + capabilities.add(BeamUrns.getUrn(StandardProtocols.Enum.DATA_SAMPLING)); return capabilities.build(); } diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java index 5c28b27148f6..b68887350d49 100644 --- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java +++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/CoderTranslationTest.java @@ -33,7 +33,6 @@ import org.apache.beam.model.pipeline.v1.RunnerApi.Components; import org.apache.beam.runners.core.construction.CoderTranslation.TranslationContext; import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.BooleanCoder; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; @@ -48,6 +47,7 @@ import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder; import org.apache.beam.sdk.coders.VarLongCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.Field; import org.apache.beam.sdk.schemas.Schema.FieldType; diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java index 17c85323ad16..9a3ed7db6bc7 100644 --- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java +++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java @@ -204,6 +204,9 @@ public void testCapabilities() { assertThat( Environments.getJavaCapabilities(), hasItem(BeamUrns.getUrn(RunnerApi.StandardProtocols.Enum.STATE_CACHING))); + assertThat( + Environments.getJavaCapabilities(), + hasItem(BeamUrns.getUrn(RunnerApi.StandardProtocols.Enum.DATA_SAMPLING))); // Check that SDF truncation is supported assertThat( Environments.getJavaCapabilities(), diff --git a/runners/direct-java/build.gradle b/runners/direct-java/build.gradle index 36ada3cc888a..32c1a2304ef1 100644 --- a/runners/direct-java/build.gradle +++ b/runners/direct-java/build.gradle @@ -26,7 +26,8 @@ def dependOnProjects = [":runners:core-construction-java", ":runners:core-java", ":runners:local-java", ":runners:java-fn-execution", - ":sdks:java:fn-execution" + ":sdks:java:fn-execution", + ":sdks:java:extensions:avro" ] applyJavaNature( @@ -96,6 +97,7 @@ dependencies { permitUnusedDeclared library.java.vendored_grpc_1_48_1 permitUnusedDeclared project(":runners:java-fn-execution") permitUnusedDeclared project(":sdks:java:fn-execution") + permitUnusedDeclared project(":sdks:java:extensions:avro") examplesJavaIntegrationTest project(project.path) examplesJavaIntegrationTest project(":examples:java") examplesJavaIntegrationTest project(path: ":examples:java", configuration: "testRuntimeMigration") diff --git a/runners/flink/1.16/build.gradle b/runners/flink/1.16/build.gradle new file mode 100644 index 000000000000..772e190aa457 --- /dev/null +++ b/runners/flink/1.16/build.gradle @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +def basePath = '..' + +/* All properties required for loading the Flink build script */ +project.ext { + // Set the version of all Flink-related dependencies here. + flink_version = '1.16.0' + // Version specific code overrides. + main_source_overrides = ["${basePath}/1.12/src/main/java", "${basePath}/1.13/src/main/java", "${basePath}/1.14/src/main/java", "${basePath}/1.15/src/main/java", './src/main/java'] + test_source_overrides = ["${basePath}/1.12/src/test/java", "${basePath}/1.13/src/test/java", "${basePath}/1.14/src/test/java", "${basePath}/1.15/src/test/java", './src/test/java'] + main_resources_overrides = [] + test_resources_overrides = [] + archives_base_name = 'beam-runners-flink-1.16' +} + +// Load the main build script which contains all build logic. +apply from: "$basePath/flink_runner.gradle" diff --git a/runners/flink/1.16/job-server-container/build.gradle b/runners/flink/1.16/job-server-container/build.gradle new file mode 100644 index 000000000000..afdb68a0fc91 --- /dev/null +++ b/runners/flink/1.16/job-server-container/build.gradle @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +def basePath = '../../job-server-container' + +project.ext { + resource_path = basePath +} + +// Load the main build script which contains all build logic. +apply from: "$basePath/flink_job_server_container.gradle" diff --git a/runners/flink/1.16/job-server/build.gradle b/runners/flink/1.16/job-server/build.gradle new file mode 100644 index 000000000000..99dc00275a0c --- /dev/null +++ b/runners/flink/1.16/job-server/build.gradle @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +def basePath = '../../job-server' + +project.ext { + // Look for the source code in the parent module + main_source_dirs = ["$basePath/src/main/java"] + test_source_dirs = ["$basePath/src/test/java"] + main_resources_dirs = ["$basePath/src/main/resources"] + test_resources_dirs = ["$basePath/src/test/resources"] + archives_base_name = 'beam-runners-flink-1.16-job-server' +} + +// Load the main build script which contains all build logic. +apply from: "$basePath/flink_job_server.gradle" diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingTest.java index 5f2434d7a25c..2921065c1547 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingTest.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; import org.apache.flink.test.util.AbstractTestBase; +import org.apache.flink.test.util.TestBaseUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -51,7 +52,7 @@ public void preSubmit() throws Exception { @After public void postSubmit() throws Exception { - compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); + TestBaseUtils.compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); } @Test diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceTest.java index 96d45ddcf1bb..2974780f049b 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceTest.java @@ -27,6 +27,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; import org.apache.flink.test.util.JavaProgramTestBase; +import org.apache.flink.test.util.TestBaseUtils; /** Reads from a bounded source in batch execution. */ public class ReadSourceTest extends JavaProgramTestBase { @@ -52,7 +53,7 @@ protected void preSubmit() throws Exception { @Override protected void postSubmit() throws Exception { - compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath); + TestBaseUtils.compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath); } @Override diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java index 6d0890621585..e3f3fda46469 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java @@ -35,6 +35,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; import org.apache.flink.test.util.AbstractTestBase; +import org.apache.flink.test.util.TestBaseUtils; import org.joda.time.Duration; import org.joda.time.Instant; import org.junit.After; @@ -63,7 +64,7 @@ public void preSubmit() throws Exception { @After public void postSubmit() throws Exception { - compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); + TestBaseUtils.compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); } /** DoFn extracting user and timestamp. */ diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsTest.java index 63abfa5b618b..faa35ca4e0a7 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsTest.java @@ -34,6 +34,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; import org.apache.flink.test.util.AbstractTestBase; +import org.apache.flink.test.util.TestBaseUtils; import org.joda.time.Duration; import org.joda.time.Instant; import org.junit.After; @@ -69,7 +70,7 @@ public void preSubmit() throws Exception { @After public void postSubmit() throws Exception { - compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); + TestBaseUtils.compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultDir); } @Test diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index bf200310398e..f7f4c32364cf 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -81,6 +81,7 @@ dependencies { implementation library.java.vendored_guava_26_0_jre implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:google-cloud-platform-core") implementation project(":sdks:java:io:kafka") implementation project(":sdks:java:io:google-cloud-platform") @@ -115,6 +116,7 @@ dependencies { testImplementation library.java.junit testImplementation project(path: ":sdks:java:io:google-cloud-platform", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro") testImplementation project(path: ":sdks:java:extensions:google-cloud-platform-core", configuration: "testRuntimeMigration") testImplementation project(path: ":runners:core-construction-java", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:extensions:python", configuration: "testRuntimeMigration") diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/AvroCoderCloudObjectTranslator.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/AvroCoderCloudObjectTranslator.java index af657761a34b..cc156aa42ab3 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/AvroCoderCloudObjectTranslator.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/AvroCoderCloudObjectTranslator.java @@ -19,7 +19,7 @@ import org.apache.avro.Schema; import org.apache.beam.runners.core.construction.SdkComponents; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** A {@link CloudObjectTranslator} for {@link AvroCoder}. */ @SuppressWarnings({ diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/CloudObjectsTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/CloudObjectsTest.java index 71208f60b205..5a1af2bf3919 100644 --- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/CloudObjectsTest.java +++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/util/CloudObjectsTest.java @@ -36,7 +36,6 @@ import java.util.Set; import org.apache.avro.generic.GenericRecord; import org.apache.beam.runners.core.construction.SdkComponents; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; @@ -54,6 +53,7 @@ import org.apache.beam.sdk.coders.StructuredCoder; import org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder; import org.apache.beam.sdk.coders.VarLongCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.SchemaCoder; diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index 08769321f1fd..acb846ab3896 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -52,6 +52,7 @@ def sdk_provided_shaded_project_dependencies = [ ] def sdk_provided_project_dependencies = [ ":runners:google-cloud-dataflow-java", + ":sdks:java:extensions:avro", ":sdks:java:extensions:google-cloud-platform-core", ":sdks:java:io:google-cloud-platform", ] diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/AvroByteReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/AvroByteReader.java index 63e6f904c991..4ff7cc6c2bce 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/AvroByteReader.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/AvroByteReader.java @@ -29,8 +29,8 @@ import org.apache.avro.Schema; import org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; -import org.apache.beam.sdk.io.AvroSource.AvroReader; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource.AvroReader; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.OffsetBasedSource; diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateReader.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateReader.java index e5d56d665cea..45708d133701 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateReader.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateReader.java @@ -985,12 +985,17 @@ protected ResultT computeNext() { throw new RuntimeException("Unable to read value from state", e); } currentPage = valuesAndContPosition.values.iterator(); - nextPagePos = + StateTag.Builder nextPageBuilder = StateTag.of( - nextPagePos.getKind(), - nextPagePos.getTag(), - nextPagePos.getStateFamily(), - valuesAndContPosition.continuationPosition); + nextPagePos.getKind(), + nextPagePos.getTag(), + nextPagePos.getStateFamily(), + valuesAndContPosition.continuationPosition) + .toBuilder(); + if (secondPagePos.getSortedListRange() != null) { + nextPageBuilder.setSortedListRange(secondPagePos.getSortedListRange()); + } + nextPagePos = nextPageBuilder.build(); pendingNextPage = // NOTE: The results of continuation page reads are never cached. reader.continuationFuture(nextPagePos, coder); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateReaderTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateReaderTest.java index 54d42545a961..ef3f470ed2a7 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateReaderTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateReaderTest.java @@ -399,7 +399,8 @@ public void testReadSortedListWithContinuations() throws Exception { .addFetchRanges(SortedListRange.newBuilder().setStart(beginning).setLimit(end)) .setFetchMaxBytes(WindmillStateReader.MAX_ORDERED_LIST_BYTES)); - final ByteString CONT = ByteString.copyFrom("CONTINUATION", Charsets.UTF_8); + final ByteString CONT_1 = ByteString.copyFrom("CONTINUATION_1", Charsets.UTF_8); + final ByteString CONT_2 = ByteString.copyFrom("CONTINUATION_2", Charsets.UTF_8); Windmill.KeyedGetDataResponse.Builder response1 = Windmill.KeyedGetDataResponse.newBuilder() .setKey(DATA_KEY) @@ -408,8 +409,8 @@ public void testReadSortedListWithContinuations() throws Exception { .setTag(STATE_KEY_1) .setStateFamily(STATE_FAMILY) .addEntries( - SortedListEntry.newBuilder().setValue(intData(5)).setSortKey(5000).setId(5)) - .setContinuationPosition(CONT) + SortedListEntry.newBuilder().setValue(intData(1)).setSortKey(1000).setId(1)) + .setContinuationPosition(CONT_1) .addFetchRanges( SortedListRange.newBuilder().setStart(beginning).setLimit(end))); @@ -424,7 +425,7 @@ public void testReadSortedListWithContinuations() throws Exception { .setTag(STATE_KEY_1) .setStateFamily(STATE_FAMILY) .addFetchRanges(SortedListRange.newBuilder().setStart(beginning).setLimit(end)) - .setRequestPosition(CONT) + .setRequestPosition(CONT_1) .setFetchMaxBytes(WindmillStateReader.MAX_ORDERED_LIST_BYTES)); Windmill.KeyedGetDataResponse.Builder response2 = @@ -435,18 +436,51 @@ public void testReadSortedListWithContinuations() throws Exception { .setTag(STATE_KEY_1) .setStateFamily(STATE_FAMILY) .addEntries( - SortedListEntry.newBuilder().setValue(intData(6)).setSortKey(6000).setId(5)) + SortedListEntry.newBuilder().setValue(intData(2)).setSortKey(2000).setId(2)) .addEntries( - SortedListEntry.newBuilder().setValue(intData(7)).setSortKey(7000).setId(7)) + SortedListEntry.newBuilder().setValue(intData(3)).setSortKey(3000).setId(3)) .addEntries( - SortedListEntry.newBuilder().setValue(intData(8)).setSortKey(8000).setId(8)) + SortedListEntry.newBuilder().setValue(intData(4)).setSortKey(4000).setId(4)) + .setContinuationPosition(CONT_2) + .addFetchRanges(SortedListRange.newBuilder().setStart(beginning).setLimit(end)) + .setRequestPosition(CONT_1)); + + Windmill.KeyedGetDataRequest.Builder expectedRequest3 = + Windmill.KeyedGetDataRequest.newBuilder() + .setKey(DATA_KEY) + .setShardingKey(SHARDING_KEY) + .setWorkToken(WORK_TOKEN) + .setMaxBytes(WindmillStateReader.MAX_KEY_BYTES) + .addSortedListsToFetch( + Windmill.TagSortedListFetchRequest.newBuilder() + .setTag(STATE_KEY_1) + .setStateFamily(STATE_FAMILY) .addFetchRanges(SortedListRange.newBuilder().setStart(beginning).setLimit(end)) - .setRequestPosition(CONT)); + .setRequestPosition(CONT_2) + .setFetchMaxBytes(WindmillStateReader.MAX_ORDERED_LIST_BYTES)); + + Windmill.KeyedGetDataResponse.Builder response3 = + Windmill.KeyedGetDataResponse.newBuilder() + .setKey(DATA_KEY) + .addTagSortedLists( + Windmill.TagSortedListFetchResponse.newBuilder() + .setTag(STATE_KEY_1) + .setStateFamily(STATE_FAMILY) + .addEntries( + SortedListEntry.newBuilder().setValue(intData(5)).setSortKey(5000).setId(5)) + .addEntries( + SortedListEntry.newBuilder().setValue(intData(6)).setSortKey(6000).setId(7)) + .addEntries( + SortedListEntry.newBuilder().setValue(intData(7)).setSortKey(7000).setId(7)) + .addFetchRanges(SortedListRange.newBuilder().setStart(beginning).setLimit(end)) + .setRequestPosition(CONT_2)); Mockito.when(mockWindmill.getStateData(COMPUTATION, expectedRequest1.build())) .thenReturn(response1.build()); Mockito.when(mockWindmill.getStateData(COMPUTATION, expectedRequest2.build())) .thenReturn(response2.build()); + Mockito.when(mockWindmill.getStateData(COMPUTATION, expectedRequest3.build())) + .thenReturn(response3.build()); Iterable> results = future.get(); Mockito.verify(mockWindmill).getStateData(COMPUTATION, expectedRequest1.build()); @@ -454,15 +488,19 @@ public void testReadSortedListWithContinuations() throws Exception { // Iterate over the results to force loading all the pages. } Mockito.verify(mockWindmill).getStateData(COMPUTATION, expectedRequest2.build()); + Mockito.verify(mockWindmill).getStateData(COMPUTATION, expectedRequest3.build()); Mockito.verifyNoMoreInteractions(mockWindmill); assertThat( results, Matchers.contains( + TimestampedValue.of(1, Instant.ofEpochMilli(1)), + TimestampedValue.of(2, Instant.ofEpochMilli(2)), + TimestampedValue.of(3, Instant.ofEpochMilli(3)), + TimestampedValue.of(4, Instant.ofEpochMilli(4)), TimestampedValue.of(5, Instant.ofEpochMilli(5)), TimestampedValue.of(6, Instant.ofEpochMilli(6)), - TimestampedValue.of(7, Instant.ofEpochMilli(7)), - TimestampedValue.of(8, Instant.ofEpochMilli(8)))); + TimestampedValue.of(7, Instant.ofEpochMilli(7)))); // NOTE: The future will still contain a reference to the underlying reader. } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java index 16eb1131c007..34d42c76031f 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java @@ -33,8 +33,6 @@ import org.apache.beam.runners.core.construction.resources.PipelineResources; import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; import org.apache.beam.runners.spark.structuredstreaming.translation.batch.functions.SideInputValues; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.coders.AvroGenericCoder; import org.apache.beam.sdk.coders.BigDecimalCoder; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.BigEndianLongCoder; @@ -227,9 +225,11 @@ public void registerClasses(Kryo kryo) { kryo.register(SideInputValues.ByWindow.class); kryo.register(SideInputValues.Global.class); + // avro coders + tryToRegister(kryo, "org.apache.beam.sdk.extensions.avro.coders.AvroCoder"); + tryToRegister(kryo, "org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder"); + // standard coders of org.apache.beam.sdk.coders - kryo.register(AvroCoder.class); - kryo.register(AvroGenericCoder.class); kryo.register(BigDecimalCoder.class); kryo.register(BigEndianIntegerCoder.class); kryo.register(BigEndianLongCoder.class); @@ -283,5 +283,13 @@ public void registerClasses(Kryo kryo) { kryo.register(TupleTag.class); kryo.register(TupleTagList.class); } + + private void tryToRegister(Kryo kryo, String className) { + try { + kryo.register(Class.forName(className)); + } catch (ClassNotFoundException e) { + LOG.info("Class {}} was not found on classpath", className); + } + } } } diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle index 7d820c28f83f..319344634de7 100644 --- a/runners/spark/spark_runner.gradle +++ b/runners/spark/spark_runner.gradle @@ -161,6 +161,7 @@ dependencies { implementation project(":runners:java-fn-execution") implementation project(":runners:java-job-service") implementation project(":sdks:java:extensions:google-cloud-platform-core") + implementation project(":sdks:java:extensions:avro") implementation library.java.jackson_annotations implementation library.java.slf4j_api implementation library.java.joda_time @@ -191,6 +192,7 @@ dependencies { testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") // SparkStateInternalsTest extends abstract StateInternalsTest testImplementation project(path: ":runners:core-java", configuration: "testRuntimeMigration") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(":sdks:java:harness") testImplementation library.java.avro testImplementation "org.apache.kafka:kafka_$spark_scala_version:2.4.1" diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java index fa49a9f5d12b..b46f5fafd42e 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java @@ -30,7 +30,7 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; diff --git a/sdks/go.mod b/sdks/go.mod index ec8a5852788a..f21b851f728a 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -20,7 +20,7 @@ // directory. module github.com/apache/beam/sdks/v2 -go 1.18 +go 1.19 require ( cloud.google.com/go/bigquery v1.45.0 @@ -30,11 +30,11 @@ require ( cloud.google.com/go/pubsub v1.28.0 cloud.google.com/go/spanner v1.43.0 cloud.google.com/go/storage v1.29.0 - github.com/aws/aws-sdk-go-v2 v1.17.3 - github.com/aws/aws-sdk-go-v2/config v1.18.11 - github.com/aws/aws-sdk-go-v2/credentials v1.13.11 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.50 - github.com/aws/aws-sdk-go-v2/service/s3 v1.30.1 + github.com/aws/aws-sdk-go-v2 v1.17.5 + github.com/aws/aws-sdk-go-v2/config v1.18.15 + github.com/aws/aws-sdk-go-v2/credentials v1.13.15 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.55 + github.com/aws/aws-sdk-go-v2/service/s3 v1.30.5 github.com/aws/smithy-go v1.13.5 github.com/docker/go-connections v0.4.0 github.com/dustin/go-humanize v1.0.1 @@ -47,20 +47,20 @@ require ( github.com/linkedin/goavro v2.1.0+incompatible github.com/proullon/ramsql v0.0.0-20211120092837-c8d0a408b939 github.com/spf13/cobra v1.6.1 - github.com/testcontainers/testcontainers-go v0.15.0 - github.com/tetratelabs/wazero v1.0.0-pre.7 + github.com/testcontainers/testcontainers-go v0.18.0 + github.com/tetratelabs/wazero v1.0.0-pre.9 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c - go.mongodb.org/mongo-driver v1.11.1 + go.mongodb.org/mongo-driver v1.11.2 golang.org/x/exp v0.0.0-20230206171751-46f607a40771 - golang.org/x/net v0.5.0 - golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 + golang.org/x/net v0.7.0 + golang.org/x/oauth2 v0.5.0 golang.org/x/sync v0.1.0 - golang.org/x/sys v0.4.0 - golang.org/x/text v0.6.0 - google.golang.org/api v0.109.0 - google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f - google.golang.org/grpc v1.52.3 + golang.org/x/sys v0.5.0 + golang.org/x/text v0.7.0 + google.golang.org/api v0.110.0 + google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc + google.golang.org/grpc v1.53.0 google.golang.org/protobuf v1.28.1 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 @@ -69,64 +69,63 @@ require ( require ( cloud.google.com/go v0.107.0 // indirect - cloud.google.com/go/compute v1.14.0 // indirect + cloud.google.com/go/compute v1.18.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v0.8.0 // indirect cloud.google.com/go/longrunning v0.3.0 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/Microsoft/go-winio v0.5.2 // indirect - github.com/Microsoft/hcsshim v0.9.5 // indirect github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect github.com/apache/thrift v0.14.2 // indirect - github.com/aws/aws-sdk-go v1.33.0 // indirect + github.com/aws/aws-sdk-go v1.34.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.21 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.27 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.21 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.3.28 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.18 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.29 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.23 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.21 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.22 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.21 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.12.0 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.0 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.18.2 // indirect - github.com/cenkalti/backoff/v4 v4.1.3 // indirect - github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect - github.com/cespare/xxhash/v2 v2.1.2 // indirect - github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4 // indirect - github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1 // indirect - github.com/containerd/cgroups v1.0.4 // indirect - github.com/containerd/containerd v1.6.12 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.24 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.23 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.23 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.4 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.18.5 // indirect + github.com/cenkalti/backoff/v4 v4.2.0 // indirect + github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect + github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b // indirect + github.com/containerd/containerd v1.6.18 // indirect github.com/docker/distribution v2.8.1+incompatible // indirect - github.com/docker/docker v20.10.17+incompatible // indirect + github.com/docker/docker v23.0.0+incompatible // indirect github.com/docker/go-units v0.5.0 // indirect - github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1 // indirect - github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect + github.com/envoyproxy/go-control-plane v0.10.3 // indirect + github.com/envoyproxy/protoc-gen-validate v0.9.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.1 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect github.com/googleapis/gax-go/v2 v2.7.0 // indirect github.com/inconshreveable/mousetrap v1.0.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/klauspost/compress v1.13.6 // indirect - github.com/magiconair/properties v1.8.6 // indirect - github.com/moby/sys/mount v0.3.3 // indirect - github.com/moby/sys/mountinfo v0.6.2 // indirect - github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/magiconair/properties v1.8.7 // indirect + github.com/moby/patternmatcher v0.5.0 // indirect + github.com/moby/sys/sequential v0.5.0 // indirect + github.com/moby/term v0.0.0-20221128092401-c43b287e0e0f // indirect github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect + github.com/opencontainers/image-spec v1.1.0-rc2 // indirect github.com/opencontainers/runc v1.1.3 // indirect github.com/pierrec/lz4/v4 v4.1.8 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 // indirect - github.com/sirupsen/logrus v1.8.1 // indirect + github.com/sirupsen/logrus v1.9.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/scram v1.1.1 // indirect @@ -134,8 +133,8 @@ require ( github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect go.opencensus.io v0.24.0 // indirect golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect - golang.org/x/tools v0.2.0 // indirect + golang.org/x/tools v0.3.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect -) \ No newline at end of file +) diff --git a/sdks/go.sum b/sdks/go.sum index cb2dfed1ee93..3e1f3427c18c 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -1,4 +1,3 @@ -bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= @@ -10,18 +9,24 @@ cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6T cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= +cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= +cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= +cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= cloud.google.com/go v0.107.0 h1:qkj22L7bgkl6vIeZDlOY2po43Mx/TIa2Wsa7VR+PEww= cloud.google.com/go v0.107.0/go.mod h1:wpc2eNrD7hXUTy8EKS10jkxpZBjASrORK7goS+3YX2I= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= +cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= +cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= +cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= cloud.google.com/go/bigquery v1.45.0 h1:DdniQAaoQU7A/L9l6UrSBX/e0BUS2vmwC9Ll/LUQbUY= cloud.google.com/go/bigquery v1.45.0/go.mod h1:frTreZmdFlTornn7K+IsIBrvCqQP0XccOvUjEker3AM= cloud.google.com/go/bigtable v1.18.1 h1:SxQk9Bj6OKxeiuvevG/KBjqGn/7X8heZbWfK0tYkFd8= cloud.google.com/go/bigtable v1.18.1/go.mod h1:NAVyfJot9jlo+KmgWLUJ5DJGwNDoChzAcrecLpmuAmY= -cloud.google.com/go/compute v1.14.0 h1:hfm2+FfxVmnRlh6LpB7cg1ZNU+5edAHmW679JePztk0= -cloud.google.com/go/compute v1.14.0/go.mod h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo= +cloud.google.com/go/compute v1.18.0 h1:FEigFqoDbys2cvFkZ9Fjq4gnHBP55anJ0yQyau2f9oY= +cloud.google.com/go/compute v1.18.0/go.mod h1:1X7yHxec2Ga+Ss6jPyjxRxpu2uu7PLgsOVXvgU0yacs= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datacatalog v1.8.1 h1:8R4W1f3YINUhK/QldgGLH8L4mu4/bsOIz5eeyD+eH1w= @@ -39,6 +44,7 @@ cloud.google.com/go/profiler v0.3.1/go.mod h1:GsG14VnmcMFQ9b+kq71wh3EKMZr3WRMgLz cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= +cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= cloud.google.com/go/pubsub v1.28.0 h1:XzabfdPx/+eNrsVVGLFgeUnQQKPGkMb8klRCeYK52is= cloud.google.com/go/pubsub v1.28.0/go.mod h1:vuXFpwaVoIPQMGXqRyUQigu/AX1S3IWugR9xznmcXX8= cloud.google.com/go/spanner v1.43.0 h1:/PPZZuBnXEM1wgCtS2kfkAwN4OhU8W/A1/z0DiXrjm4= @@ -46,399 +52,172 @@ cloud.google.com/go/spanner v1.43.0/go.mod h1:G8XIgYdOK+Fbcpbs7p2fiprDw4CaZX63wh cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= +cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= +cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= cloud.google.com/go/storage v1.29.0 h1:6weCgzRvMg7lzuUurI4697AqIRPU1SvzHhynwpW31jI= cloud.google.com/go/storage v1.29.0/go.mod h1:4puEjyTKnku6gfKoTfNOU/W+a9JyuVNxjpS5GBrB8h4= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= -github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/azure-storage-blob-go v0.14.0/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= -github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= -github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= -github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= -github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= github.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M= github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= -github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= -github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= -github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= -github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= -github.com/Microsoft/go-winio v0.4.16-0.20201130162521-d1ffc52c7331/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= -github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= -github.com/Microsoft/go-winio v0.4.17-0.20210211115548-6eac466e5fa3/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.4.17-0.20210324224401-5516f17a5958/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.5.2 h1:a9IhgEQBCUEk6QCdml9CiJGhAws+YwffDHEMp1VMrpA= github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= -github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= -github.com/Microsoft/hcsshim v0.8.7-0.20190325164909-8abdbb8205e4/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= -github.com/Microsoft/hcsshim v0.8.7/go.mod h1:OHd7sQqRFrYd3RmSgbgji+ctCwkbq2wbEYNSzOYtcBQ= -github.com/Microsoft/hcsshim v0.8.9/go.mod h1:5692vkUqntj1idxauYlpoINNKeqCiG6Sg38RRsjT5y8= -github.com/Microsoft/hcsshim v0.8.14/go.mod h1:NtVKoYxQuTLx6gEq0L96c9Ju4JbRJ4nY2ow3VK6a9Lg= -github.com/Microsoft/hcsshim v0.8.15/go.mod h1:x38A4YbHbdxJtc0sF6oIz+RG0npwSCAvn69iY6URG00= -github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600= -github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4= -github.com/Microsoft/hcsshim v0.9.5 h1:AbV+VPfTrIVffukazHcpxmz/sRiE6YaMDzHWR9BXZHo= -github.com/Microsoft/hcsshim v0.9.5/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= -github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU= -github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY= -github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= +github.com/Microsoft/hcsshim v0.9.6 h1:VwnDOgLeoi2du6dAznfmspNqTiwczvjv4K7NxuY9jsY= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alexflint/go-filemutex v0.0.0-20171022225611-72bdc8eae2ae/go.mod h1:CgnQgUtFrFz9mxFNtED3jI5tLDjKlOM+oUF/sTk6ps0= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.14.2 h1:hY4rAyg7Eqbb27GB6gkhUKrRAuc8xRjlNtJq+LseKeY= github.com/apache/thrift v0.14.2/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= github.com/aws/aws-sdk-go v1.17.4/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= -github.com/aws/aws-sdk-go v1.33.0 h1:Bq5Y6VTLbfnJp1IV8EL/qUU5qO1DYHda/zis/sqevkY= -github.com/aws/aws-sdk-go v1.33.0/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= +github.com/aws/aws-sdk-go v1.34.0 h1:brux2dRrlwCF5JhTL7MUT3WUwo9zfDHZZp3+g3Mvlmo= +github.com/aws/aws-sdk-go v1.34.0/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= -github.com/aws/aws-sdk-go-v2 v1.17.3 h1:shN7NlnVzvDUgPQ+1rLMSxY8OWRNDRYtiqe0p/PgrhY= -github.com/aws/aws-sdk-go-v2 v1.17.3/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2 v1.17.5 h1:TzCUW1Nq4H8Xscph5M/skINUitxM5UBAyvm2s7XBzL4= +github.com/aws/aws-sdk-go-v2 v1.17.5/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 h1:dK82zF6kkPeCo8J1e+tGx4JdvDIQzj7ygIoLg8WMuGs= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10/go.mod h1:VeTZetY5KRJLuD/7fkQXMU6Mw7H5m/KP2J5Iy9osMno= github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= -github.com/aws/aws-sdk-go-v2/config v1.18.11 h1:7dJD4p90OyKYIihuwe/LbHfP7uw4yVm5P1hel+b8UZ8= -github.com/aws/aws-sdk-go-v2/config v1.18.11/go.mod h1:FTGKr2F7QL7IAg22dUmEB5NWpLPAOuhrONzXe7TVhAI= +github.com/aws/aws-sdk-go-v2/config v1.18.15 h1:509yMO0pJUGUugBP2H9FOFyV+7Mz7sRR+snfDN5W4NY= +github.com/aws/aws-sdk-go-v2/config v1.18.15/go.mod h1:vS0tddZqpE8cD9CyW0/kITHF5Bq2QasW9Y1DFHD//O0= github.com/aws/aws-sdk-go-v2/credentials v1.3.1/go.mod h1:r0n73xwsIVagq8RsxmZbGSRQFj9As3je72C2WzUIToc= -github.com/aws/aws-sdk-go-v2/credentials v1.13.11 h1:QnvlTut1XXKkX4aaM1Ydo5X0CHriv0jmLu8PTVQQJJo= -github.com/aws/aws-sdk-go-v2/credentials v1.13.11/go.mod h1:tqAm4JmQaShel+Qi38hmd1QglSnnxaYt50k/9yGQzzc= +github.com/aws/aws-sdk-go-v2/credentials v1.13.15 h1:0rZQIi6deJFjOEgHI9HI2eZcLPPEGQPictX66oRFLL8= +github.com/aws/aws-sdk-go-v2/credentials v1.13.15/go.mod h1:vRMLMD3/rXU+o6j2MW5YefrGMBmdTvkLLGqFwMLBHQc= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.3.0/go.mod h1:2LAuqPx1I6jNfaGDucWfA2zqQCYCOMCDHiCOciALyNw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.21 h1:j9wi1kQ8b+e0FBVHxCqCGo4kxDU175hoDHcWAi0sauU= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.21/go.mod h1:ugwW57Z5Z48bpvUyZuaPy4Kv+vEfJWnIrky7RmkBvJg= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23 h1:Kbiv9PGnQfG/imNI4L/heyUXvzKmcWSBeDvkrQz5pFc= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.23/go.mod h1:mOtmAg65GT1HIL/HT/PynwPbS+UG0BgCZ6vhkPqnxWo= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.3.2/go.mod h1:qaqQiHSrOUVOfKe6fhgQ6UzhxjwqVW8aHNegd6Ws4w4= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.50 h1:ATgzvd5DaU0Evx7yvaUw2ftwiWDGnDN59zowPF3jDk0= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.50/go.mod h1:naA7bah2/dpvwlyWysZ7yaAYI1Ti73HPaDyGryfJuiU= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.27 h1:I3cakv2Uy1vNmmhRQmFptYDxOvBnwCdNwyw63N0RaRU= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.27/go.mod h1:a1/UpzeyBBerajpnP5nGZa9mGzsBn5cOKxm6NWQsvoI= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.21 h1:5NbbMrIzmUn/TXFqAle6mgrH5m9cOvMLRGL7pnG8tRE= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.21/go.mod h1:+Gxn8jYn5k9ebfHEqlhrMirFjSW0v0C9fI+KN5vk2kE= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.55 h1:ClZKHmu2QIRQCEQ2Y2upfu4JPO0pG69Ce5eiq3PS2V4= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.55/go.mod h1:L/h5B6I7reig2QJXCGY0e0NVx4hYCcjETmsfR02hFng= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.29 h1:9/aKwwus0TQxppPXFmf010DFrE+ssSbzroLVYINA+xE= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.29/go.mod h1:Dip3sIGv485+xerzVv24emnjX5Sg88utCL8fwGmCeWg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.23 h1:b/Vn141DBuLVgXbhRWIrl9g+ww7G+ScV5SzniWR13jQ= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.23/go.mod h1:mr6c4cHC+S/MMkrjtSlG4QA36kOznDep+0fga5L/fGQ= github.com/aws/aws-sdk-go-v2/internal/ini v1.1.1/go.mod h1:Zy8smImhTdOETZqfyn01iNOe0CNggVbPjCajyaz6Gvg= -github.com/aws/aws-sdk-go-v2/internal/ini v1.3.28 h1:KeTxcGdNnQudb46oOl4d90f2I33DF/c6q3RnZAmvQdQ= -github.com/aws/aws-sdk-go-v2/internal/ini v1.3.28/go.mod h1:yRZVr/iT0AqyHeep00SZ4YfBAKojXz08w3XMBscdi0c= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.18 h1:H/mF2LNWwX00lD6FlYfKpLLZgUW7oIzCBkig78x4Xok= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.18/go.mod h1:T2Ku+STrYQ1zIkL1wMvj8P3wWQaaCMKNdz70MT2FLfE= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30 h1:IVx9L7YFhpPq0tTnGo8u8TpluFu7nAn9X3sUDMb11c0= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.30/go.mod h1:vsbq62AOBwQ1LJ/GWKFxX8beUEYeRp/Agitrxee2/qM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.21 h1:QdxdY43AiwsqG/VAqHA7bIVSm3rKr8/p9i05ydA0/RM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.21/go.mod h1:QtIEat7ksHH8nFItljyvMI0dGj8lipK2XZ4PhNihTEU= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.2.1/go.mod h1:v33JQ57i2nekYTA70Mb+O18KeH4KqhdqxTJZNK1zdRE= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 h1:y2+VQzC6Zh2ojtV2LoC0MNwHWc6qXv/j2vrQtlftkdA= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11/go.mod h1:iV4q2hsqtNECrfmlXyord9u4zyuFEJX9eLgLpSPzWA8= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.22 h1:kv5vRAl00tozRxSnI0IszPWGXsJOyA7hmEUHFYqsyvw= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.22/go.mod h1:Od+GU5+Yx41gryN/ZGZzAJMZ9R1yn6lgA0fD5Lo5SkQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.24 h1:Qmm8klpAdkuN3/rPrIMa/hZQ1z93WMBPjOzdAsbSnlo= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.24/go.mod h1:QelGeWBVRh9PbbXsfXKTFlU9FjT6W2yP+dW5jMQzOkg= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.2.1/go.mod h1:zceowr5Z1Nh2WVP8bf/3ikB41IZW59E4yIYbg+pC6mw= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.21 h1:5C6XgTViSb0bunmU57b3CT+MhxULqHH2721FVA+/kDM= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.21/go.mod h1:lRToEJsn+DRA9lW4O9L9+/3hjTkUzlzyzHqn8MTds5k= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.23 h1:QoOybhwRfciWUBbZ0gp9S7XaDnCuSTeK/fySB99V1ls= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.23/go.mod h1:9uPh+Hrz2Vn6oMnQYiUi/zbh3ovbnQk19YKINkQny44= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.5.1/go.mod h1:6EQZIwNNvHpq/2/QSJnp4+ECvqIy55w95Ofs0ze+nGQ= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21 h1:vY5siRXvW5TrOKm2qKEf9tliBfdLxdfy0i02LOcmqUo= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21/go.mod h1:WZvNXT1XuH8dnJM0HvOlvk+RNn7NbAPvA/ACO0QarSc= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.23 h1:qc+RW0WWZ2KApMnsu/EVCPqLTyIH55uc7YQq7mq4XqE= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.23/go.mod h1:FJhZWVWBCcgAF8jbep7pxQ1QUsjzTwa9tvEXGw2TDRo= github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32yTv8GpBquCApZEycDLunI= -github.com/aws/aws-sdk-go-v2/service/s3 v1.30.1 h1:kIgvVY7PHx4gIb0na/Q9gTWJWauTwhKdaqJjX8PkIY8= -github.com/aws/aws-sdk-go-v2/service/s3 v1.30.1/go.mod h1:L2l2/q76teehcW7YEsgsDjqdsDTERJeX3nOMIFlgGUE= +github.com/aws/aws-sdk-go-v2/service/s3 v1.30.5 h1:kFfb+NMap4R7nDvBYyABa/nw7KFMtAfygD1Hyoxh4uE= +github.com/aws/aws-sdk-go-v2/service/s3 v1.30.5/go.mod h1:Dze3kNt4T+Dgb8YCfuIFSBLmE6hadKNxqfdF0Xmqz1I= github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= -github.com/aws/aws-sdk-go-v2/service/sso v1.12.0 h1:/2gzjhQowRLarkkBOGPXSRnb8sQ2RVsjdG1C/UliK/c= -github.com/aws/aws-sdk-go-v2/service/sso v1.12.0/go.mod h1:wo/B7uUm/7zw/dWhBJ4FXuw1sySU5lyIhVg1Bu2yL9A= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.0 h1:Jfly6mRxk2ZOSlbCvZfKNS7TukSx1mIzhSsqZ/IGSZI= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.0/go.mod h1:TZSH7xLO7+phDtViY/KUp9WGCJMQkLJ/VpgkTFd5gh8= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.4 h1:qJdM48OOLl1FBSzI7ZrA1ZfLwOyCYqkXV5lko1hYDBw= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.4/go.mod h1:jtLIhd+V+lft6ktxpItycqHqiVXrPIRjWIsFIlzMriw= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.4 h1:YRkWXQveFb0tFC0TLktmmhGsOcCgLwvq88MC2al47AA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.4/go.mod h1:zVwRrfdSmbRZWkUkWjOItY7SOalnFnq/Yg2LVPqDjwc= github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BVRASvcU7gYZB9PUgPiByXg= -github.com/aws/aws-sdk-go-v2/service/sts v1.18.2 h1:J/4wIaGInCEYCGhTSruxCxeoA5cy91a+JT7cHFKFSHQ= -github.com/aws/aws-sdk-go-v2/service/sts v1.18.2/go.mod h1:+lGbb3+1ugwKrNTWcf2RT05Xmp543B06zDFTwiTLp7I= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.5 h1:L1600eLr0YvTT7gNh3Ni24yGI7NSHkq9Gp62vijPRCs= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.5/go.mod h1:1mKZHLLpDMHTNSYPJ7qrcnCQdHCWsNQaT0xRvq2u80s= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= -github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= -github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= -github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= -github.com/bshuster-repo/logrus-logstash-hook v0.4.1/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk= -github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= -github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= -github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50= -github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= -github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= -github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= -github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= +github.com/cenkalti/backoff/v4 v4.2.0 h1:HN5dHm3WBOgndBH6E8V0q2jIYIR3s9yglV8k/+MN3u4= +github.com/cenkalti/backoff/v4 v4.2.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/census-instrumentation/opencensus-proto v0.3.0 h1:t/LhUZLVitR1Ow2YOnduCsavhwFUklBMoGVYUCqmCqk= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= +github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= -github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= -github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg= -github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc= -github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= -github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4 h1:hzAQntlaYRkVSFEfj9OTWlVV1H155FMD8BTKktLv0QI= github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= +github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk= +github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1 h1:zH8ljVhhq7yC0MIeUL/IviMtY8hx2mK8cN9wEYb8ggw= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= +github.com/cncf/xds/go v0.0.0-20220314180256-7f1daf1720fc/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b h1:ACGZRIr7HsgBKHsueQ1yM4WaVaXh21ynwqsF8M8tXhA= +github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= -github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE= -github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU= -github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= -github.com/containerd/aufs v1.0.0/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= -github.com/containerd/btrfs v0.0.0-20201111183144-404b9149801e/go.mod h1:jg2QkJcsabfHugurUvvPhS3E08Oxiuh5W/g1ybB4e0E= -github.com/containerd/btrfs v0.0.0-20210316141732-918d888fb676/go.mod h1:zMcX3qkXTAi9GI50+0HOeuV8LU2ryCE/V2vG/ZBiTss= -github.com/containerd/btrfs v1.0.0/go.mod h1:zMcX3qkXTAi9GI50+0HOeuV8LU2ryCE/V2vG/ZBiTss= -github.com/containerd/cgroups v0.0.0-20190717030353-c4b9ac5c7601/go.mod h1:X9rLEHIqSf/wfK8NsPqxJmeZgW4pcfzdXITDrUSJ6uI= -github.com/containerd/cgroups v0.0.0-20190919134610-bf292b21730f/go.mod h1:OApqhQ4XNSNC13gXIwDjhOQxjWa/NxkwZXJ1EvqT0ko= -github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1pT8KYB3TCXK/ocprsh7MAkoW8bZVzPdih9snmM= -github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= -github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= -github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= -github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= -github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA= -github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA= -github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= -github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= -github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= -github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= -github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/containerd v1.2.10/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.0-beta.2.0.20190828155532-0293cbd26c69/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.0/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.1-0.20191213020239-082f7e3aed57/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.2/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.0-beta.2.0.20200729163537-40b22ef07410/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.1/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.3/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.5.0-beta.1/go.mod h1:5HfvG1V2FsKesEGQ17k5/T7V960Tmcumvqn8Mc+pCYQ= -github.com/containerd/containerd v1.5.0-beta.3/go.mod h1:/wr9AVtEM7x9c+n0+stptlo/uBBoBORwEx6ardVcmKU= -github.com/containerd/containerd v1.5.0-beta.4/go.mod h1:GmdgZd2zA2GYIBZ0w09ZvgqEq8EfBp/m3lcVZIvPHhI= -github.com/containerd/containerd v1.5.0-rc.0/go.mod h1:V/IXoMqNGgBlabz3tHD2TWDoTJseu1FGOKuoA4nNb2s= -github.com/containerd/containerd v1.5.1/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g= -github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c= -github.com/containerd/containerd v1.6.12 h1:kJ9b3mOFKf8yqo05Ob+tMoxvt1pbVWhnB0re9Y+k+8c= -github.com/containerd/containerd v1.6.12/go.mod h1:K4Bw7gjgh4TnkmQY+py/PYQGp4e7xgnHAeg87VeWb3A= -github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe/go.mod h1:cECdGN1O8G9bgKTlLhuPJimka6Xb/Gg7vYzCTNVxhvo= -github.com/containerd/continuity v0.0.0-20201208142359-180525291bb7/go.mod h1:kR3BEg7bDFaEddKm54WSmrol1fKWDU1nKYkgrcgZT7Y= -github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EXlVlkqNba9rJe3j7w3Xa924itAMLgZH4UD/Q4PExuQ= -github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM= +github.com/containerd/containerd v1.6.18 h1:qZbsLvmyu+Vlty0/Ex5xc0z2YtKpIsb5n45mAMI+2Ns= +github.com/containerd/containerd v1.6.18/go.mod h1:1RdCUu95+gc2v9t3IL+zIlpClSmew7/0YS8O5eQZrOw= github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= -github.com/containerd/fifo v0.0.0-20180307165137-3d5202aec260/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= -github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= -github.com/containerd/fifo v0.0.0-20200410184934-f15a3290365b/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0= -github.com/containerd/fifo v0.0.0-20201026212402-0724c46b320c/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0= -github.com/containerd/fifo v0.0.0-20210316144830-115abcc95a1d/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4= -github.com/containerd/fifo v1.0.0/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4= -github.com/containerd/go-cni v1.0.1/go.mod h1:+vUpYxKvAF72G9i1WoDOiPGRtQpqsNW/ZHtSlv++smU= -github.com/containerd/go-cni v1.0.2/go.mod h1:nrNABBHzu0ZwCug9Ije8hL2xBCYh/pjfMb1aZGrrohk= -github.com/containerd/go-runc v0.0.0-20180907222934-5a6d9f37cfa3/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0= -github.com/containerd/go-runc v0.0.0-20190911050354-e029b79d8cda/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0= -github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328/go.mod h1:PpyHrqVs8FTi9vpyHwPwiNEGaACDxT/N/pLcvMSRA9g= -github.com/containerd/go-runc v0.0.0-20201020171139-16b287bc67d0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= -github.com/containerd/go-runc v1.0.0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= -github.com/containerd/imgcrypt v1.0.1/go.mod h1:mdd8cEPW7TPgNG4FpuP3sGBiQ7Yi/zak9TYCG3juvb0= -github.com/containerd/imgcrypt v1.0.4-0.20210301171431-0ae5c75f59ba/go.mod h1:6TNsg0ctmizkrOgXRNQjAPFWpMYRWuiB6dSF4Pfa5SA= -github.com/containerd/imgcrypt v1.1.1-0.20210312161619-7ed62a527887/go.mod h1:5AZJNI6sLHJljKuI9IHnw1pWqo/F0nGDOuR9zgTs7ow= -github.com/containerd/imgcrypt v1.1.1/go.mod h1:xpLnwiQmEUJPvQoAapeb2SNCxz7Xr6PJrXQb0Dpc4ms= -github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFYfE5+So4M5syatU0N0f0LbWpuqyMi4/BE8c= -github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= -github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= -github.com/containerd/stargz-snapshotter/estargz v0.4.1/go.mod h1:x7Q9dg9QYb4+ELgxmo4gBUeJB0tl5dqH1Sdz0nJU1QM= -github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= -github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= -github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8= -github.com/containerd/ttrpc v1.0.1/go.mod h1:UAxOpgT9ziI0gJrmKvgcZivgxOp8iFPSk8httJEt98Y= -github.com/containerd/ttrpc v1.0.2/go.mod h1:UAxOpgT9ziI0gJrmKvgcZivgxOp8iFPSk8httJEt98Y= -github.com/containerd/ttrpc v1.1.0/go.mod h1:XX4ZTnoOId4HklF4edwc4DcqskFZuvXB1Evzy5KFQpQ= -github.com/containerd/typeurl v0.0.0-20180627222232-a93fcdb778cd/go.mod h1:Cm3kwCdlkCfMSHURc+r6fwoGH6/F1hH3S4sg0rLFWPc= -github.com/containerd/typeurl v0.0.0-20190911142611-5eb25027c9fd/go.mod h1:GeKYzf2pQcqv7tJ0AoCuuhtnqhva5LNU3U+OyKxxJpk= -github.com/containerd/typeurl v1.0.1/go.mod h1:TB1hUtrpaiO88KEK56ijojHS1+NeF0izUACaJW2mdXg= -github.com/containerd/typeurl v1.0.2/go.mod h1:9trJWW2sRlGub4wZJRTW83VtbOLS6hwcDZXTn6oPz9s= -github.com/containerd/zfs v0.0.0-20200918131355-0a33824f23a2/go.mod h1:8IgZOBdv8fAgXddBT4dBXJPtxyRsejFIpXoklgxgEjw= -github.com/containerd/zfs v0.0.0-20210301145711-11e8f1707f62/go.mod h1:A9zfAbMlQwE+/is6hi0Xw8ktpL+6glmqZYtevJgaB8Y= -github.com/containerd/zfs v0.0.0-20210315114300-dde8f0fda960/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containerd/zfs v0.0.0-20210324211415-d5c4544f0433/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containerd/zfs v1.0.0/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containernetworking/cni v0.7.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/cni v0.8.0/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/cni v0.8.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/plugins v0.8.6/go.mod h1:qnw5mN19D8fIwkqW7oHHYDHVlzhJpcY6TQxn/fUyDDM= -github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRDjeJr6FLK6vuiUwoH7P8= -github.com/containers/ocicrypt v1.0.1/go.mod h1:MeJDzk1RJHv89LjsH0Sp5KTY3ZYkjXO/C+bKAeWFIrc= -github.com/containers/ocicrypt v1.1.0/go.mod h1:b8AOe0YR67uU8OqfVNcznfFpAzu3rdgUV4GP9qXPfu4= -github.com/containers/ocicrypt v1.1.1/go.mod h1:Dm55fwWm1YZAjYRaJ94z2mfZikIyIN4B0oB3dj3jFxY= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= -github.com/coreos/go-iptables v0.5.0/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= -github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= -github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= -github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= +github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= -github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s= -github.com/d2g/dhcp4server v0.0.0-20181031114812-7d4a0a7f59a5/go.mod h1:Eo87+Kg/IX2hfWJfwxMzLyuSZyxSoAug2nGa1G2QAi8= -github.com/d2g/hardwareaddr v0.0.0-20190221164911-e7d9fbe030e4/go.mod h1:bMl4RjIciD2oAxI7DmWRx6gbeqrkoLqv3MV0vzNad+I= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/denverdino/aliyungo v0.0.0-20190125010748-a747050bb1ba/go.mod h1:dV8lFg6daOBZbT6/BDGIz6Y3WFGn8juu6G+CQ6LHtl0= -github.com/dgrijalva/jwt-go v0.0.0-20170104182250-a601269ab70c/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= -github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= -github.com/docker/cli v0.0.0-20191017083524-a8ff7f821017/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/distribution v0.0.0-20190905152932-14b96e55d84c/go.mod h1:0+TTO4EOBfRPhZXAeF1Vu+W3hHZ8eLp8PgKVZlcvtFY= -github.com/docker/distribution v2.7.1-0.20190205005809-0d3efadf0154+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68= github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/docker v20.10.17+incompatible h1:JYCuMrWaVNophQTOrMMoSwudOVEfcegoZZrleKc1xwE= -github.com/docker/docker v20.10.17+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/docker-credential-helpers v0.6.3/go.mod h1:WRaJzqw3CTB9bk10avuGsjVBZsD05qeibJ1/TYlvc0Y= +github.com/docker/docker v23.0.0+incompatible h1:L6c28tNyqZ4/ub9AZC9d5QUuunoHHfEH4/Ue+h/E5nE= +github.com/docker/docker v23.0.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= -github.com/docker/go-events v0.0.0-20170721190031-9461782956ad/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/docker/go-metrics v0.0.0-20180209012529-399ea8c73916/go.mod h1:/u0gXw0Gay3ceNrsHubL3BtdOL2fHf93USgMTe0W5dI= -github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= -github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= -github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1 h1:xvqufLtNVwAhN8NMyWklVgxnWohi+wtMGQMhtxexlm0= -github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= -github.com/envoyproxy/protoc-gen-validate v0.1.0 h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A= +github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/go-control-plane v0.10.3 h1:xdCVXxEe0Y3FQith+0cj2irwZudqGYvecuLB1HtdexY= +github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= +github.com/envoyproxy/protoc-gen-validate v0.9.1 h1:PS7VIOgmSVhWUEeZwTe7z7zouA22Cr590PzXKbZHOVY= +github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/frankban/quicktest v1.2.2/go.mod h1:Qh/WofXFeiAFII1aEBu529AtJo6Zg2VHscnEsbBnJ20= github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fullsailor/pkcs7 v0.0.0-20190404230743-d7302db945fa/go.mod h1:KnogPXtdwXqoenmZCw6S+25EAm2MkxbG0deNDu4cbSA= -github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= -github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gorp/gorp v2.0.0+incompatible h1:dIQPsBtl6/H1MjVseWuWPXa7ET4p6Dve4j3Hg+UjqYw= github.com/go-gorp/gorp v2.0.0+incompatible/go.mod h1:7IfkAQnO7jfT/9IQ3R9wL1dFhukN6aQxzKTHnkxzA/E= -github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= -github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= -github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= -github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= -github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= -github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= -github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= -github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= -github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/godbus/dbus v0.0.0-20151105175453-c7fdd8b5cd55/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= -github.com/godbus/dbus v0.0.0-20180201030542-885f9cc04c9c/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= -github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= -github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v1.2.0/go.mod h1:Njal3psf3qN6dwBtQfUmBZh2ybovJ0tlu3o/AC7HYjU= -github.com/gogo/googleapis v1.4.0/go.mod h1:5YRNX2z1oM5gXdAkurHa942MDgEJyk02w4OecKY87+c= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.0/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -450,8 +229,8 @@ github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFU github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -485,6 +264,7 @@ github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzr github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -492,14 +272,13 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-containerregistry v0.5.1/go.mod h1:Ct15B4yir3PLOP5jsy0GNeYVaIZs/MK/Jz5any1wFW0= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.2.1 h1:d8MncMlErDFTwQGBK1xhv026j9kqhvw1Qv9IbWT1VLQ= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -507,95 +286,51 @@ github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c h1:lvddKcYTQ545ADhBujtIJmqQrZBDsGo7XIMbAQe/sNY= github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.2.1 h1:RY7tHKZcRlk788d5WSo/e83gOyyy742E8GSs771ySpg= -github.com/googleapis/enterprise-certificate-proxy v0.2.1/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= +github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= +github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.7.0 h1:IcsPKeInNvYi7eqSaDjiZqDDKu5rsmunY0Y1YupQSSQ= github.com/googleapis/gax-go/v2 v2.7.0/go.mod h1:TEop28CZZQ2y+c0VxMUmu1lV+fQx57QpBWsYpwqHJx8= -github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/handlers v0.0.0-20150720190736-60c7bfde3e33/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= -github.com/gorilla/mux v1.7.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/hashicorp/errwrap v0.0.0-20141028054710-7554cd9344ce/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-multierror v0.0.0-20161216184304-ed905158d874/go.mod h1:JMRHfdO9jKNzS/+BTlxCjKNQHg/jZAft8U7LloJvN7I= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= -github.com/imdario/mergo v0.3.8/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= -github.com/imdario/mergo v0.3.10/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= -github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joefitzgerald/rainbow-reporter v0.1.0/go.mod h1:481CNgqmVHQZzdIbN52CupLJyoVwB10FQ/IQlF1pdL8= github.com/johannesboyne/gofakes3 v0.0.0-20221110173912-32fb85c5aed6 h1:eQGUsj2LcsLzfrHY1noKDSU7h+c9/rw9pQPwbQ9g1jQ= github.com/johannesboyne/gofakes3 v0.0.0-20221110173912-32fb85c5aed6/go.mod h1:LIAXxPvcUXwOcTIj9LSNSUpE9/eMHalTWxsP/kmWxQI= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= -github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= @@ -604,205 +339,73 @@ github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/linkedin/goavro v2.1.0+incompatible h1:DV2aUlj2xZiuxQyvag8Dy7zjY69ENjS66bWkSfdpddY= github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM= -github.com/linuxkit/virtsock v0.0.0-20201010232012-f8cee7dfc7a3/go.mod h1:3r6x7q95whyfWQpmGZTu3gk3v2YkMi05HEzl7Tf7YEo= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo= -github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= -github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= -github.com/marstr/guid v1.1.0/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= -github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY= -github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= -github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A= -github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= -github.com/moby/sys/mount v0.3.3 h1:fX1SVkXFJ47XWDoeFW4Sq7PdQJnV2QIDZAqjNqgEjUs= -github.com/moby/sys/mount v0.3.3/go.mod h1:PBaEorSNTLG5t/+4EgukEQVlAvVEc6ZjTySwKdqp5K0= -github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= -github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= +github.com/moby/patternmatcher v0.5.0 h1:YCZgJOeULcxLw1Q+sVR636pmS7sPEn1Qo2iAN6M7DBo= +github.com/moby/patternmatcher v0.5.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= -github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= -github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= -github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ= -github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo= -github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 h1:dcztxKSvZ4Id8iPpHERQBbIJfabdt4wUm5qy3wOL2Zc= -github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6/go.mod h1:E2VnQOmVuvZB6UYnnDB0qG5Nq/1tD9acaOpo6xmt0Kw= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= +github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= +github.com/moby/term v0.0.0-20221128092401-c43b287e0e0f h1:J/7hjLaHLD7epG0m6TBMGmp4NQ+ibBYLfeyJWdAIFLA= +github.com/moby/term v0.0.0-20221128092401-c43b287e0e0f/go.mod h1:15ce4BGCFxt7I5NQKT+HV0yEDxmf6fSysfEDiVo3zFM= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/ncw/swift v1.0.47/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= -github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= -github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= -github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= -github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v1.0.0-rc1.0.20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.0.0/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 h1:rc3tiVYb5z54aKaDfakKn0dDjIyPpTtszkjuMzyt7ec= -github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= -github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= +github.com/opencontainers/image-spec v1.1.0-rc2 h1:2zx/Stx4Wc5pIPDvIxHXvXtQFW/7XWJGmnM7r3wg034= +github.com/opencontainers/image-spec v1.1.0-rc2/go.mod h1:3OVijpioIKYWTqjiG0zfF6wvoJ4fAXGbjdZuI2NgsRQ= github.com/opencontainers/runc v1.1.3 h1:vIXrkId+0/J2Ymu2m7VjGvbSlAId9XNRPhn2p4b+d8w= github.com/opencontainers/runc v1.1.3/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= -github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= -github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE= -github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= -github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= -github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4= github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= -github.com/prometheus/client_golang v0.0.0-20180209125602-c332b6f63c06/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= -github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.0.0-20180110214958-89604d197083/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= -github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= -github.com/prometheus/procfs v0.0.0-20180125133057-cb4147076ac7/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.0-20190522114515-bc1a522cf7b1/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= -github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/proullon/ramsql v0.0.0-20211120092837-c8d0a408b939 h1:mtMU7aT8cTAyNL3O4RyOfe/OOUxwCN525SIbKQoUvw0= github.com/proullon/ramsql v0.0.0-20211120092837-c8d0a408b939/go.mod h1:jG8oAQG0ZPHPyxg5QlMERS31airDC+ZuqiAe8DUvFVo= github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a h1:3QH7VyOaaiUHNrA9Se4YQIRkDTCw1EJls9xTUCaCeRM= github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjIWCcK8DO4KMclc5Iknq5qVBAlbYYzAbUScQ= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.8.1 h1:geMPLpDpQOgVyCg5z5GoRwLHepNdb71NXb67XFkP+Eg= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= -github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= -github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= -github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U= -github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 h1:J6qvD6rbmOil46orKqJaRPG+zTpoGlBTUdyv8ki63L0= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+VKSARF5y/tS9XFSP7vWDfS+GUC5vs/YT7M5XDTUEM= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= -github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= +github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= +github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= -github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= -github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= @@ -814,41 +417,22 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= -github.com/testcontainers/testcontainers-go v0.15.0 h1:3Ex7PUGFv0b2bBsdOv6R42+SK2qoZnWBd21LvZYhUtQ= -github.com/testcontainers/testcontainers-go v0.15.0/go.mod h1:PkohMRH2X8Hib0IWtifVexDfLPVT+tb5E9hsf7cW12w= -github.com/tetratelabs/wazero v1.0.0-pre.7 h1:WI5N14XxoXw+ZWhcjSazJ6rEowhJbH/x8hglxC5gN7k= -github.com/tetratelabs/wazero v1.0.0-pre.7/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= +github.com/testcontainers/testcontainers-go v0.18.0 h1:8RXrcIQv5xX/uBOSmZd297gzvA7F0yuRA37/918o7Yg= +github.com/testcontainers/testcontainers-go v0.18.0/go.mod h1:rLC7hR2SWRjJZZNrUYiTKvUXCziNxzZiYtz9icTWYNQ= +github.com/tetratelabs/wazero v1.0.0-pre.9 h1:2uVdi2bvTi/JQxG2cp3LRm2aRadd3nURn5jcfbvqZcw= +github.com/tetratelabs/wazero v1.0.0-pre.9/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ= github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= -github.com/urfave/cli v0.0.0-20171014202726-7bc6a0acffa5/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= -github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= -github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1 h1:VOMT+81stJgXW3CpHyqHN3AXDYIMsx56mEFrB37Mb/E= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3 h1:kdwGpVNwPFtjs98xCGkHjQtGKh86rDcRZN17QEMCOIs= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx0K/GyB0o2bww= github.com/xitongsys/parquet-go v1.6.2 h1:MhCaXii4eqceKPu9BwrjLqyK10oX9WF+xGhwvwbw7xM= github.com/xitongsys/parquet-go v1.6.2/go.mod h1:IulAQyalCm0rPiZVNnCgm/PCL64X2tdSVGMQ/UeKqWA= @@ -856,48 +440,33 @@ github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c h1:UDtocVeACpnwauljUbeHD9UOjjcvF5kLUHruww7VT9A= github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c/go.mod h1:qLb2Itmdcp7KPa5KZKvhE9U1q5bYSOmgeOckF/H2rQA= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= +github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs= -github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA= -github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= -go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= -go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= -go.mongodb.org/mongo-driver v1.11.1 h1:QP0znIRTuL0jf1oBQoAoM0C6ZJfBK4kx0Uumtv1A7w8= -go.mongodb.org/mongo-driver v1.11.1/go.mod h1:s7p5vEtfbeR1gYi6pnj3c3/urpbLv2T5Sfd6Rp2HBB8= -go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= +go.mongodb.org/mongo-driver v1.11.2 h1:+1v2rDQUWNcGW7/7E0Jvdz51V38XXxJfhzbV17aNHCw= +go.mongodb.org/mongo-driver v1.11.2/go.mod h1:s7p5vEtfbeR1gYi6pnj3c3/urpbLv2T5Sfd6Rp2HBB8= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181009213950-7c1a557ab941/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= @@ -925,6 +494,7 @@ golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= @@ -934,12 +504,9 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190310074541-c10a0554eabf/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -947,16 +514,10 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -966,25 +527,29 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= -golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 h1:nt+Q6cXKz4MosCSpnbMtqiQ8Oz0pxTef2B4Vca2lvfk= -golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.5.0 h1:HuArIo48skDwlrvM3sEdHXElYslAMsf3KwRkkW4MC4s= +golang.org/x/oauth2 v0.5.0/go.mod h1:9/XBHVqLaWO3/BRHs5jbpYCnOZVjj5V0ndyaAM7KB4I= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -994,126 +559,82 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190812073006-9eafafc0a87e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200817155316-9781c653f443/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200922070232-aee5d888a860/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201117170446-d9b008d0a637/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201202213521-69691e467435/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= -golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= -golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.1.0 h1:xYY+Bajn2a7VBmTM5GikTmnK8ZuX8YgnQCqZpbBNtmA= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190308174544-00c44ba9c14f/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190706070813-72ffa07ba3db/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1131,23 +652,28 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200616133436-c1934b75d054/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200916195026-c9a70fc28ce3/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= +golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= -golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.3.0 h1:SrNbZl6ECOS1qFzgTdQfWXZM9XBkiA6tkFrH9YSTPHM= +golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= @@ -1157,10 +683,15 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.109.0 h1:sW9hgHyX497PP5//NUM7nqfV8D0iDfBApqq7sOh1XR8= -google.golang.org/api v0.109.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/O9MY= +google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= +google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/api v0.110.0 h1:l+rh0KYUooe9JGbGVx71tbFo4SMbMTXK3I3ia2QSEeU= +google.golang.org/api v0.110.0/go.mod h1:7FC4Vvx1Mooxh8C5HWjzZHcavuS2f6pmJpZx60ca7iI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1169,13 +700,11 @@ google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCID google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/cloud v0.0.0-20151119220103-975617b05ea8/go.mod h1:0H1ncTHf11KCFhTc/+EFRbzSCOZx+VUbRMk55Yv5MYk= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190522204451-c2c4e71fbf69/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= @@ -1184,39 +713,47 @@ google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvx google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20200527145253-8367513e4ece/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= -google.golang.org/genproto v0.0.0-20201110150050-8816d57aaa9a/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f h1:BWUVssLB0HVOSY78gIdvk1dTVYtT1y8SBWtPYuTJ/6w= -google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= -google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= +google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= +google.golang.org/genproto v0.0.0-20220329172620-7be39ac1afc7/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= +google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc h1:ijGwO+0vL2hJt5gaygqP2j6PfflOBrRot0IczKbmtio= +google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.52.3 h1:pf7sOysg4LdgBqduXveGKrcEwbStiK2rtfghdzlUYDQ= -google.golang.org/grpc v1.52.3/go.mod h1:pu6fVzoFb+NBYNAvQL08ic+lvB2IojljRYuun5vorUY= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= +google.golang.org/grpc v1.53.0 h1:LAv2ds7cmFV/XTS3XG1NneeENYrXGmorPxsBbptIjNc= +google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1230,22 +767,14 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20141024133853-64131543e789/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= -gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= @@ -1254,76 +783,25 @@ gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLv gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4= gopkg.in/linkedin/goavro.v1 v1.0.5/go.mod h1:Aw5GdAbizjOEl0kAMHV9iHmA8reZzW/OKuJAl4Hb9F0= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= -gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/retry.v1 v1.0.3 h1:a9CArYczAVv6Qs6VGoLMio99GEs7kY9UzSF9+LD+iGs= gopkg.in/retry.v1 v1.0.3/go.mod h1:FJkXmWiMaAo7xB+xhvDF59zhfjDWyzmyAxiT4dB688g= -gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= -gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= -gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= -gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= +gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.20.1/go.mod h1:KqwcCVogGxQY3nBlRpwt+wpAMF/KjaCc7RpywacvqUo= -k8s.io/api v0.20.4/go.mod h1:++lNL1AJMkDymriNniQsWRkMDzRaX2Y/POTUi8yvqYQ= -k8s.io/api v0.20.6/go.mod h1:X9e8Qag6JV/bL5G6bU8sdVRltWKmdHsFUGS3eVndqE8= -k8s.io/apimachinery v0.20.1/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= -k8s.io/apimachinery v0.20.4/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= -k8s.io/apimachinery v0.20.6/go.mod h1:ejZXtW1Ra6V1O5H8xPBGz+T3+4gfkTCeExAHKU57MAc= -k8s.io/apiserver v0.20.1/go.mod h1:ro5QHeQkgMS7ZGpvf4tSMx6bBOgPfE+f52KwvXfScaU= -k8s.io/apiserver v0.20.4/go.mod h1:Mc80thBKOyy7tbvFtB4kJv1kbdD0eIH8k8vianJcbFM= -k8s.io/apiserver v0.20.6/go.mod h1:QIJXNt6i6JB+0YQRNcS0hdRHJlMhflFmsBDeSgT1r8Q= -k8s.io/client-go v0.20.1/go.mod h1:/zcHdt1TeWSd5HoUe6elJmHSQ6uLLgp4bIJHVEuy+/Y= -k8s.io/client-go v0.20.4/go.mod h1:LiMv25ND1gLUdBeYxBIwKpkSC5IsozMMmOOeSJboP+k= -k8s.io/client-go v0.20.6/go.mod h1:nNQMnOvEUEsOzRRFIIkdmYOjAZrC8bgq0ExboWSU1I0= -k8s.io/code-generator v0.19.7/go.mod h1:lwEq3YnLYb/7uVXLorOJfxg+cUu2oihFhHZ0n9NIla0= -k8s.io/component-base v0.20.1/go.mod h1:guxkoJnNoh8LNrbtiQOlyp2Y2XFCZQmrcg2n/DeYNLk= -k8s.io/component-base v0.20.4/go.mod h1:t4p9EdiagbVCJKrQ1RsA5/V4rFQNDfRlevJajlGwgjI= -k8s.io/component-base v0.20.6/go.mod h1:6f1MPBAeI+mvuts3sIdtpjljHWBQ2cIy38oBIWMYnrM= -k8s.io/cri-api v0.17.3/go.mod h1:X1sbHmuXhwaHs9xxYffLqJogVsnI+f6cPRcgPel7ywM= -k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= -k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= -k8s.io/cri-api v0.20.6/go.mod h1:ew44AjNXwyn1s0U4xCKGodU7J1HzBeZ1MpGrpa5r8Yc= -k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= -k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= -k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= -k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= -k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6/go.mod h1:UuqjUnNftUyPE5H64/qeyjQoUZhGpeFDVdxjTeEVN2o= -k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAGcJo0Tvi+dK12EcqSLqcWsryKMpfM= -k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk= -k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= rsc.io/binaryregexp v0.2.0 h1:HfqmD5MEmC0zvwBuF187nq9mdnXjXsSivRiXN7SmRkE= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.14/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.15/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/structured-merge-diff/v4 v4.0.3/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= \ No newline at end of file diff --git a/sdks/go/examples/wasm/README.md b/sdks/go/examples/wasm/README.md index ad25ce87771c..e75f0895a53a 100644 --- a/sdks/go/examples/wasm/README.md +++ b/sdks/go/examples/wasm/README.md @@ -33,9 +33,9 @@ BEAM_HOME=$(pwd)/beam cd $BEAM_HOME/sdks ``` -## Direct Runner execution +## Local Runner execution -To execute this example on the direct runner: +To execute this example on the local runner: ```shell go run ./go/examples/wasm --output=$OUTPUT @@ -134,4 +134,4 @@ SUBNETWORK=regions/$REGION/subnetworks/ cd $BEAM_HOME/sdks go run ./go/examples/wasm --runner=dataflow --output=$OUTPUT --environment_config=apache/beam_go_sdk:latest \ --project=$PROJECT --region=$REGION --network=$NETWORK --subnetwork=$SUBNETWORK --staging_location=$STAGING -``` \ No newline at end of file +``` diff --git a/sdks/go/examples/wasm/wasm.go b/sdks/go/examples/wasm/wasm.go index 3d51d55bb57b..71cec3095f30 100644 --- a/sdks/go/examples/wasm/wasm.go +++ b/sdks/go/examples/wasm/wasm.go @@ -81,7 +81,7 @@ func main() { func run(ctx context.Context) error { p, s := beam.NewPipelineWithRoot() - in := beam.Create(s, "Ada", "Lovelace", "World", "Beam", "Senior López") + in := beam.Create(s, "Ada", "Lovelace", "World", "Beam", "Senior López", "Random unicorn emoji 🦄") out := beam.ParDo(s, &embeddedWasmFn{}, in) @@ -115,14 +115,14 @@ func (fn *embeddedWasmFn) Setup(ctx context.Context) error { // log to the console. _, err := fn.r.NewHostModuleBuilder("env"). NewFunctionBuilder().WithFunc(logString).Export("log"). - Instantiate(ctx, fn.r) + Instantiate(ctx) if err != nil { return fmt.Errorf("failed to instantiate host module: %w", err) } // Instantiate a WebAssembly module that imports the "log" function defined // in "env" and exports "memory" and functions we'll use in this example. - fn.mod, err = fn.r.InstantiateModuleFromBinary(ctx, greetWasm) + fn.mod, err = fn.r.Instantiate(ctx, greetWasm) if err != nil { return fmt.Errorf("failed to instantiate wasm module: %v", err) } diff --git a/sdks/go/pkg/beam/core/core.go b/sdks/go/pkg/beam/core/core.go index fd9e4e90f7d5..79b381385aee 100644 --- a/sdks/go/pkg/beam/core/core.go +++ b/sdks/go/pkg/beam/core/core.go @@ -27,7 +27,7 @@ const ( // SdkName is the human readable name of the SDK for UserAgents. SdkName = "Apache Beam SDK for Go" // SdkVersion is the current version of the SDK. - SdkVersion = "2.46.0.dev" + SdkVersion = "2.47.0.dev" // DefaultDockerImage represents the associated image for this release. DefaultDockerImage = "apache/beam_go_sdk:" + SdkVersion diff --git a/sdks/go/pkg/beam/core/graph/fn.go b/sdks/go/pkg/beam/core/graph/fn.go index 25b846370fb5..54cc02e07b3d 100644 --- a/sdks/go/pkg/beam/core/graph/fn.go +++ b/sdks/go/pkg/beam/core/graph/fn.go @@ -1099,13 +1099,13 @@ func validateSdfElementT(fn *Fn, name string, method *funcx.Fn, num int, startIn for i := 0; i < num; i++ { idx := i + startIndex - if method.Param[idx].T != processFn.Param[pos+i].T { + if got, want := method.Param[i+startIndex].T, processFn.Param[pos+i].T; got != want { err := errors.Errorf("mismatched element type in method %v, param %v. got: %v, want: %v", - name, idx, method.Param[idx].T, processFn.Param[pos+i].T) + name, idx, got, want) return errors.SetTopLevelMsgf(err, "Mismatched element type in method %v, "+ "parameter at index %v. Got: %v, Want: %v (from method %v). "+ "Ensure that element parameters in SDF methods have consistent types with element parameters in %v.", - name, idx, method.Param[idx].T, processFn.Param[pos+i].T, processElementName, processElementName) + name, idx, got, want, processElementName, processElementName) } } return nil @@ -1250,7 +1250,7 @@ func validateStatefulWatermarkSig(fn *Fn, numMainIn int) error { "Ensure that all restrictions in an SDF are the same type.", initialWatermarkEstimatorStateName, 1, method.Param[1].T, restT, createTrackerName) } - if err := validateSdfElementT(fn, restrictionSizeName, method, numMainIn, 2); err != nil { + if err := validateSdfElementT(fn, initialWatermarkEstimatorStateName, method, numMainIn, 2); err != nil { return err } diff --git a/sdks/go/pkg/beam/core/runtime/exec/datasink.go b/sdks/go/pkg/beam/core/runtime/exec/datasink.go index 36f2a5195ca2..6b39a2bb44f1 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/datasink.go +++ b/sdks/go/pkg/beam/core/runtime/exec/datasink.go @@ -77,7 +77,7 @@ func (n *DataSink) ProcessElement(ctx context.Context, value *FullValue, values return err } if err := n.enc.Encode(value, &b); err != nil { - return errors.WithContextf(err, "encoding element %v with coder %v", value, n.enc) + return errors.WithContextf(err, "encoding element %v with coder %v", value, n.Coder) } byteCount, err := n.w.Write(b.Bytes()) if err != nil { diff --git a/sdks/go/pkg/beam/core/runtime/xlangx/expand.go b/sdks/go/pkg/beam/core/runtime/xlangx/expand.go index 1a3040575c17..f48a26230419 100644 --- a/sdks/go/pkg/beam/core/runtime/xlangx/expand.go +++ b/sdks/go/pkg/beam/core/runtime/xlangx/expand.go @@ -184,10 +184,8 @@ func QueryExpansionService(ctx context.Context, p *HandlerParams) (*jobpb.Expans } if attempt.Count() == maxRetries { - if err != nil { - err = errors.Wrap(err, "expansion failed") - return nil, errors.WithContextf(err, "expanding transform with ExpansionRequest: %v", req) - } + err = errors.Wrap(err, "expansion failed") + return nil, errors.WithContextf(err, "expanding transform with ExpansionRequest: %v", req) } } if len(res.GetError()) != 0 { // ExpansionResponse includes an error. diff --git a/sdks/go/pkg/beam/io/mongodbio/read.go b/sdks/go/pkg/beam/io/mongodbio/read.go index 59d8cf6aef90..101d1f4af890 100644 --- a/sdks/go/pkg/beam/io/mongodbio/read.go +++ b/sdks/go/pkg/beam/io/mongodbio/read.go @@ -159,10 +159,12 @@ func inferProjection(t reflect.Type, tagKey string) bson.D { return projection } -func (fn *readFn) CreateInitialRestriction(_ []byte) idRangeRestriction { - ctx := context.Background() +func (fn *readFn) CreateInitialRestriction( + ctx context.Context, + _ []byte, +) (idRangeRestriction, error) { if err := fn.Setup(ctx); err != nil { - panic(err) + return idRangeRestriction{}, err } outerRange, err := findOuterIDRange(ctx, fn.collection, fn.filter) @@ -174,10 +176,10 @@ func (fn *readFn) CreateInitialRestriction(_ []byte) idRangeRestriction { fn.Database, fn.Collection, ) - return idRangeRestriction{} + return idRangeRestriction{}, nil } - panic(err) + return idRangeRestriction{}, err } return newIDRangeRestriction( @@ -185,7 +187,7 @@ func (fn *readFn) CreateInitialRestriction(_ []byte) idRangeRestriction { fn.collection, outerRange, fn.filter, - ) + ), nil } func findOuterIDRange( @@ -213,22 +215,25 @@ func findOuterIDRange( return outerRange, nil } -func (fn *readFn) SplitRestriction(_ []byte, rest idRangeRestriction) []idRangeRestriction { +func (fn *readFn) SplitRestriction( + ctx context.Context, + _ []byte, + rest idRangeRestriction, +) ([]idRangeRestriction, error) { if rest.Count == 0 { - return []idRangeRestriction{rest} + return []idRangeRestriction{rest}, nil } - ctx := context.Background() if err := fn.Setup(ctx); err != nil { - panic(err) + return nil, err } splits, err := rest.SizedSplits(ctx, fn.collection, fn.BundleSize, fn.BucketAuto) if err != nil { - panic(err) + return nil, err } - return splits + return splits, nil } func (fn *readFn) CreateTracker(rest idRangeRestriction) *sdf.LockRTracker { diff --git a/sdks/go/pkg/beam/io/textio/textio.go b/sdks/go/pkg/beam/io/textio/textio.go index a6f909aea1a6..ed8be0a42b25 100644 --- a/sdks/go/pkg/beam/io/textio/textio.go +++ b/sdks/go/pkg/beam/io/textio/textio.go @@ -34,7 +34,6 @@ import ( func init() { beam.RegisterType(reflect.TypeOf((*readFn)(nil)).Elem()) - beam.RegisterFunction(sizeFn) beam.RegisterType(reflect.TypeOf((*writeFileFn)(nil)).Elem()) beam.RegisterFunction(expandFn) } @@ -82,8 +81,7 @@ func ReadAllSdf(s beam.Scope, col beam.PCollection) beam.PCollection { // into separate bundles. func read(s beam.Scope, col beam.PCollection) beam.PCollection { files := beam.ParDo(s, expandFn, col) - sized := beam.ParDo(s, sizeFn, files) - return beam.ParDo(s, &readFn{}, sized) + return beam.ParDo(s, &readFn{}, files) } // expandFn expands a glob pattern into all matching file names. @@ -108,36 +106,29 @@ func expandFn(ctx context.Context, glob string, emit func(string)) error { return nil } -// sizeFn pairs a filename with the size of that file in bytes. -// TODO(https://github.com/apache/beam/issues/20607): Once CreateInitialRestriction supports Context params and -// error return values, this can be done in readSdfFn.CreateInitialRestriction. -func sizeFn(ctx context.Context, filename string) (string, int64, error) { +// readFn reads individual lines from a text file. Implemented as an SDF +// to allow splitting within a file. +type readFn struct { +} + +// CreateInitialRestriction creates an offset range restriction representing +// the file's size in bytes. +func (fn *readFn) CreateInitialRestriction(ctx context.Context, filename string) (offsetrange.Restriction, error) { fs, err := filesystem.New(ctx, filename) if err != nil { - return "", -1, err + return offsetrange.Restriction{}, err } defer fs.Close() size, err := fs.Size(ctx, filename) if err != nil { - return "", -1, err + return offsetrange.Restriction{}, err } - return filename, size, nil -} -// readFn reads individual lines from a text file, given a filename and a -// size in bytes for that file. Implemented as an SDF to allow splitting -// within a file. -type readFn struct { -} - -// CreateInitialRestriction creates an offset range restriction representing -// the file, using the paired size rather than fetching the file's size. -func (fn *readFn) CreateInitialRestriction(_ string, size int64) offsetrange.Restriction { return offsetrange.Restriction{ Start: 0, End: size, - } + }, nil } const ( @@ -150,7 +141,7 @@ const ( // SplitRestriction splits each file restriction into blocks of a predeterined // size, with some checks to avoid having small remainders. -func (fn *readFn) SplitRestriction(_ string, _ int64, rest offsetrange.Restriction) []offsetrange.Restriction { +func (fn *readFn) SplitRestriction(_ string, rest offsetrange.Restriction) []offsetrange.Restriction { splits := rest.SizedSplits(blockSize) numSplits := len(splits) if numSplits > 1 { @@ -165,7 +156,7 @@ func (fn *readFn) SplitRestriction(_ string, _ int64, rest offsetrange.Restricti } // Size returns the size of each restriction as its range. -func (fn *readFn) RestrictionSize(_ string, _ int64, rest offsetrange.Restriction) float64 { +func (fn *readFn) RestrictionSize(_ string, rest offsetrange.Restriction) float64 { return rest.Size() } @@ -183,7 +174,7 @@ func (fn *readFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker // begin within the restriction and past the restriction (those are entirely // output, including the portion outside the restriction). In some cases a // valid restriction might not output any lines. -func (fn *readFn) ProcessElement(ctx context.Context, rt *sdf.LockRTracker, filename string, _ int64, emit func(string)) error { +func (fn *readFn) ProcessElement(ctx context.Context, rt *sdf.LockRTracker, filename string, emit func(string)) error { log.Infof(ctx, "Reading from %v", filename) fs, err := filesystem.New(ctx, filename) diff --git a/sdks/go/pkg/beam/runners/prism/README.md b/sdks/go/pkg/beam/runners/prism/README.md index 1e91a3d64f8f..0fc6e6e68416 100644 --- a/sdks/go/pkg/beam/runners/prism/README.md +++ b/sdks/go/pkg/beam/runners/prism/README.md @@ -29,6 +29,7 @@ It's intended to replace the current Go Direct runner, but also be for general single machine use. For Go SDK users: + - `import "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism"` - Short term: set runner to "prism" to use it, or invoke directly. - Medium term: switch the default from "direct" to "prism". - Long term: alias "direct" to "prism", and delete legacy Go direct runner. diff --git a/sdks/go/pkg/beam/runners/prism/internal/coders_test.go b/sdks/go/pkg/beam/runners/prism/internal/coders_test.go index ad6e36496286..c6e32c895fe6 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/coders_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/coders_test.go @@ -334,7 +334,7 @@ func Test_pullDecoder(t *testing.T) { ComponentCoderIds: []string{"elm"}, }, map[string]*pipepb.Coder{ - "elm": &pipepb.Coder{ + "elm": { Spec: &pipepb.FunctionSpec{ Urn: urns.CoderVarInt, }, @@ -350,12 +350,12 @@ func Test_pullDecoder(t *testing.T) { ComponentCoderIds: []string{"key", "value"}, }, map[string]*pipepb.Coder{ - "key": &pipepb.Coder{ + "key": { Spec: &pipepb.FunctionSpec{ Urn: urns.CoderVarInt, }, }, - "value": &pipepb.Coder{ + "value": { Spec: &pipepb.FunctionSpec{ Urn: urns.CoderBool, }, diff --git a/sdks/go/pkg/beam/runners/prism/internal/config/config_test.go b/sdks/go/pkg/beam/runners/prism/internal/config/config_test.go index 59fcdd56d65f..4c2642e78f99 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/config/config_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/config/config_test.go @@ -54,7 +54,7 @@ func TestHandlerRegistry(t *testing.T) { type spotCheck struct { v, h string - want interface{} + want any } tests := []struct { name string diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/data.go b/sdks/go/pkg/beam/runners/prism/internal/engine/data.go new file mode 100644 index 000000000000..6fc192ac83be --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/engine/data.go @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package engine + +// TentativeData is where data for in progress bundles is put +// until the bundle executes successfully. +type TentativeData struct { + Raw map[string][][]byte +} + +// WriteData adds data to a given global collectionID. +func (d *TentativeData) WriteData(colID string, data []byte) { + if d.Raw == nil { + d.Raw = map[string][][]byte{} + } + d.Raw[colID] = append(d.Raw[colID], data) +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go new file mode 100644 index 000000000000..aeabc81b8123 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go @@ -0,0 +1,675 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package engine handles the operational components of a runner, to +// track elements, watermarks, timers, triggers etc +package engine + +import ( + "bytes" + "container/heap" + "context" + "fmt" + "io" + "sync" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + "golang.org/x/exp/slog" +) + +type element struct { + window typex.Window + timestamp mtime.Time + pane typex.PaneInfo + + elmBytes []byte +} + +type elements struct { + es []element + minTimestamp mtime.Time +} + +type PColInfo struct { + GlobalID string + WDec exec.WindowDecoder + WEnc exec.WindowEncoder + EDec func(io.Reader) []byte +} + +// ToData recodes the elements with their approprate windowed value header. +func (es elements) ToData(info PColInfo) [][]byte { + var ret [][]byte + for _, e := range es.es { + var buf bytes.Buffer + exec.EncodeWindowedValueHeader(info.WEnc, []typex.Window{e.window}, e.timestamp, e.pane, &buf) + buf.Write(e.elmBytes) + ret = append(ret, buf.Bytes()) + } + return ret +} + +// elementHeap orders elements based on their timestamps +// so we can always find the minimum timestamp of pending elements. +type elementHeap []element + +func (h elementHeap) Len() int { return len(h) } +func (h elementHeap) Less(i, j int) bool { return h[i].timestamp < h[j].timestamp } +func (h elementHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *elementHeap) Push(x any) { + // Push and Pop use pointer receivers because they modify the slice's length, + // not just its contents. + *h = append(*h, x.(element)) +} + +func (h *elementHeap) Pop() any { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +type Config struct { + // MaxBundleSize caps the number of elements permitted in a bundle. + // 0 or less means this is ignored. + MaxBundleSize int +} + +// ElementManager handles elements, watermarks, and related errata to determine +// if a stage is able to be executed. It is the core execution engine of Prism. +// +// Essentially, it needs to track the current watermarks for each PCollection +// and transform/stage. But it's tricky, since the watermarks for the +// PCollections are always relative to transforms/stages. +// +// Key parts: +// +// - The parallel input's PCollection's watermark is relative to committed consumed +// elements. That is, the input elements consumed by the transform after a successful +// bundle, can advance the watermark, based on the minimum of their elements. +// - An output PCollection's watermark is relative to its producing transform, +// which relates to *all of it's outputs*. +// +// This means that a PCollection's watermark is the minimum of all it's consuming transforms. +// +// So, the watermark manager needs to track: +// Pending Elements for each stage, along with their windows and timestamps. +// Each transform's view of the watermarks for the PCollections. +// +// Watermarks are advanced based on consumed input, except if the stage produces residuals. +type ElementManager struct { + config Config + + stages map[string]*stageState // The state for each stage. + + consumers map[string][]string // Map from pcollectionID to stageIDs that consumes them as primary input. + sideConsumers map[string][]string // Map from pcollectionID to stageIDs that consumes them as side input. + + pcolParents map[string]string // Map from pcollectionID to stageIDs that produce the pcollection. + + refreshCond sync.Cond // refreshCond protects the following fields with it's lock, and unblocks bundle scheduling. + inprogressBundles set[string] // Active bundleIDs + watermarkRefreshes set[string] // Scheduled stageID watermark refreshes + + pendingElements sync.WaitGroup // pendingElements counts all unprocessed elements in a job. Jobs with no pending elements terminate successfully. +} + +func NewElementManager(config Config) *ElementManager { + return &ElementManager{ + config: config, + stages: map[string]*stageState{}, + consumers: map[string][]string{}, + sideConsumers: map[string][]string{}, + pcolParents: map[string]string{}, + watermarkRefreshes: set[string]{}, + inprogressBundles: set[string]{}, + refreshCond: sync.Cond{L: &sync.Mutex{}}, + } +} + +// AddStage adds a stage to this element manager, connecting it's PCollections and +// nodes to the watermark propagation graph. +func (em *ElementManager) AddStage(ID string, inputIDs, sides, outputIDs []string) { + slog.Debug("AddStage", slog.String("ID", ID), slog.Any("inputs", inputIDs), slog.Any("sides", sides), slog.Any("outputs", outputIDs)) + ss := makeStageState(ID, inputIDs, sides, outputIDs) + + em.stages[ss.ID] = ss + for _, outputIDs := range ss.outputIDs { + em.pcolParents[outputIDs] = ss.ID + } + for _, input := range inputIDs { + em.consumers[input] = append(em.consumers[input], ss.ID) + } + for _, side := range ss.sides { + em.sideConsumers[side] = append(em.sideConsumers[side], ss.ID) + } +} + +// StageAggregates marks the given stage as an aggregation, which +// means elements will only be processed based on windowing strategies. +func (em *ElementManager) StageAggregates(ID string) { + em.stages[ID].aggregate = true +} + +// Impulse marks and initializes the given stage as an impulse which +// is a root transform that starts processing. +func (em *ElementManager) Impulse(stageID string) { + stage := em.stages[stageID] + newPending := []element{{ + window: window.GlobalWindow{}, + timestamp: mtime.MinTimestamp, + pane: typex.NoFiringPane(), + elmBytes: []byte{0}, // Represents an encoded 0 length byte slice. + }} + + consumers := em.consumers[stage.outputIDs[0]] + slog.Debug("Impulse", slog.String("stageID", stageID), slog.Any("outputs", stage.outputIDs), slog.Any("consumers", consumers)) + + em.pendingElements.Add(len(consumers)) + for _, sID := range consumers { + consumer := em.stages[sID] + consumer.AddPending(newPending) + } + refreshes := stage.updateWatermarks(mtime.MaxTimestamp, mtime.MaxTimestamp, em) + em.addRefreshes(refreshes) +} + +type RunBundle struct { + StageID string + BundleID string + Watermark mtime.Time +} + +func (rb RunBundle) LogValue() slog.Value { + return slog.GroupValue( + slog.String("ID", rb.BundleID), + slog.String("stage", rb.StageID), + slog.Time("watermark", rb.Watermark.ToTime())) +} + +// Bundles is the core execution loop. It produces a sequences of bundles able to be executed. +// The returned channel is closed when the context is canceled, or there are no pending elements +// remaining. +func (em *ElementManager) Bundles(ctx context.Context, nextBundID func() string) <-chan RunBundle { + runStageCh := make(chan RunBundle) + ctx, cancelFn := context.WithCancel(ctx) + go func() { + em.pendingElements.Wait() + slog.Info("no more pending elements: terminating pipeline") + cancelFn() + // Ensure the watermark evaluation goroutine exits. + em.refreshCond.Broadcast() + }() + // Watermark evaluation goroutine. + go func() { + defer close(runStageCh) + for { + em.refreshCond.L.Lock() + // If there are no watermark refreshes available, we wait until there are. + for len(em.watermarkRefreshes) == 0 { + // Check to see if we must exit + select { + case <-ctx.Done(): + em.refreshCond.L.Unlock() + return + default: + } + em.refreshCond.Wait() // until watermarks may have changed. + } + + // We know there is some work we can do that may advance the watermarks, + // refresh them, and see which stages have advanced. + advanced := em.refreshWatermarks() + + // Check each advanced stage, to see if it's able to execute based on the watermark. + for stageID := range advanced { + ss := em.stages[stageID] + watermark, ready := ss.bundleReady(em) + if ready { + bundleID, ok := ss.startBundle(watermark, nextBundID) + if !ok { + continue + } + rb := RunBundle{StageID: stageID, BundleID: bundleID, Watermark: watermark} + + em.inprogressBundles.insert(rb.BundleID) + em.refreshCond.L.Unlock() + + select { + case <-ctx.Done(): + return + case runStageCh <- rb: + } + em.refreshCond.L.Lock() + } + } + em.refreshCond.L.Unlock() + } + }() + return runStageCh +} + +// InputForBundle returns pre-allocated data for the given bundle, encoding the elements using +// the PCollection's coders. +func (em *ElementManager) InputForBundle(rb RunBundle, info PColInfo) [][]byte { + ss := em.stages[rb.StageID] + ss.mu.Lock() + defer ss.mu.Unlock() + es := ss.inprogress[rb.BundleID] + return es.ToData(info) +} + +// PersistBundle uses the tentative bundle output to update the watermarks for the stage. +// Each stage has two monotonically increasing watermarks, the input watermark, and the output +// watermark. +// +// MAX(CurrentInputWatermark, MIN(PendingElements, InputPCollectionWatermarks) +// MAX(CurrentOutputWatermark, MIN(InputWatermark, WatermarkHolds)) +// +// PersistBundle takes in the stage ID, ID of the bundle associated with the pending +// input elements, and the committed output elements. +func (em *ElementManager) PersistBundle(rb RunBundle, col2Coders map[string]PColInfo, d TentativeData, inputInfo PColInfo, residuals [][]byte, estimatedOWM map[string]mtime.Time) { + stage := em.stages[rb.StageID] + for output, data := range d.Raw { + info := col2Coders[output] + var newPending []element + slog.Debug("PersistBundle: processing output", "bundle", rb, slog.String("output", output)) + for _, datum := range data { + buf := bytes.NewBuffer(datum) + if len(datum) == 0 { + panic(fmt.Sprintf("zero length data for %v: ", output)) + } + for { + var rawBytes bytes.Buffer + tee := io.TeeReader(buf, &rawBytes) + ws, et, pn, err := exec.DecodeWindowedValueHeader(info.WDec, tee) + if err != nil { + if err == io.EOF { + break + } + slog.Error("PersistBundle: error decoding watermarks", err, "bundle", rb, slog.String("output", output)) + panic("error decoding watermarks") + } + // TODO: Optimize unnecessary copies. This is doubleteeing. + elmBytes := info.EDec(tee) + for _, w := range ws { + newPending = append(newPending, + element{ + window: w, + timestamp: et, + pane: pn, + elmBytes: elmBytes, + }) + } + } + } + consumers := em.consumers[output] + slog.Debug("PersistBundle: bundle has downstream consumers.", "bundle", rb, slog.Int("newPending", len(newPending)), "consumers", consumers) + for _, sID := range consumers { + em.pendingElements.Add(len(newPending)) + consumer := em.stages[sID] + consumer.AddPending(newPending) + } + } + + // Return unprocessed to this stage's pending + var unprocessedElements []element + for _, residual := range residuals { + buf := bytes.NewBuffer(residual) + ws, et, pn, err := exec.DecodeWindowedValueHeader(inputInfo.WDec, buf) + if err != nil { + if err == io.EOF { + break + } + slog.Error("PersistBundle: error decoding residual header", err, "bundle", rb) + panic("error decoding residual header") + } + + for _, w := range ws { + unprocessedElements = append(unprocessedElements, + element{ + window: w, + timestamp: et, + pane: pn, + elmBytes: buf.Bytes(), + }) + } + } + // Add unprocessed back to the pending stack. + if len(unprocessedElements) > 0 { + em.pendingElements.Add(len(unprocessedElements)) + stage.AddPending(unprocessedElements) + } + // Clear out the inprogress elements associated with the completed bundle. + // Must be done after adding the new pending elements to avoid an incorrect + // watermark advancement. + stage.mu.Lock() + completed := stage.inprogress[rb.BundleID] + em.pendingElements.Add(-len(completed.es)) + delete(stage.inprogress, rb.BundleID) + // If there are estimated output watermarks, set the estimated + // output watermark for the stage. + if len(estimatedOWM) > 0 { + estimate := mtime.MaxTimestamp + for _, t := range estimatedOWM { + estimate = mtime.Min(estimate, t) + } + stage.estimatedOutput = estimate + } + stage.mu.Unlock() + + // TODO support state/timer watermark holds. + em.addRefreshAndClearBundle(stage.ID, rb.BundleID) +} + +func (em *ElementManager) addRefreshes(stages set[string]) { + em.refreshCond.L.Lock() + defer em.refreshCond.L.Unlock() + em.watermarkRefreshes.merge(stages) + em.refreshCond.Broadcast() +} + +func (em *ElementManager) addRefreshAndClearBundle(stageID, bundID string) { + em.refreshCond.L.Lock() + defer em.refreshCond.L.Unlock() + delete(em.inprogressBundles, bundID) + em.watermarkRefreshes.insert(stageID) + em.refreshCond.Broadcast() +} + +// refreshWatermarks incrementally refreshes the watermarks, and returns the set of stages where the +// the watermark may have advanced. +// Must be called while holding em.refreshCond.L +func (em *ElementManager) refreshWatermarks() set[string] { + // Need to have at least one refresh signal. + nextUpdates := set[string]{} + refreshed := set[string]{} + var i int + for stageID := range em.watermarkRefreshes { + // clear out old one. + em.watermarkRefreshes.remove(stageID) + ss := em.stages[stageID] + refreshed.insert(stageID) + + dummyStateHold := mtime.MaxTimestamp + + refreshes := ss.updateWatermarks(ss.minPendingTimestamp(), dummyStateHold, em) + nextUpdates.merge(refreshes) + // cap refreshes incrementally. + if i < 10 { + i++ + } else { + break + } + } + em.watermarkRefreshes.merge(nextUpdates) + return refreshed +} + +type set[K comparable] map[K]struct{} + +func (s set[K]) remove(k K) { + delete(s, k) +} + +func (s set[K]) insert(k K) { + s[k] = struct{}{} +} + +func (s set[K]) merge(o set[K]) { + for k := range o { + s.insert(k) + } +} + +// stageState is the internal watermark and input tracking for a stage. +type stageState struct { + ID string + inputID string // PCollection ID of the parallel input + outputIDs []string // PCollection IDs of outputs to update consumers. + sides []string // PCollection IDs of side inputs that can block execution. + + // Special handling bits + aggregate bool // whether this state needs to block for aggregation. + strat winStrat // Windowing Strategy for aggregation fireings. + + mu sync.Mutex + upstreamWatermarks sync.Map // watermark set from inputPCollection's parent. + input mtime.Time // input watermark for the parallel input. + output mtime.Time // Output watermark for the whole stage + estimatedOutput mtime.Time // Estimated watermark output from DoFns + + pending elementHeap // pending input elements for this stage that are to be processesd + inprogress map[string]elements // inprogress elements by active bundles, keyed by bundle +} + +// makeStageState produces an initialized stageState. +func makeStageState(ID string, inputIDs, sides, outputIDs []string) *stageState { + ss := &stageState{ + ID: ID, + outputIDs: outputIDs, + sides: sides, + strat: defaultStrat{}, + + input: mtime.MinTimestamp, + output: mtime.MinTimestamp, + estimatedOutput: mtime.MinTimestamp, + } + + // Initialize the upstream watermarks to minTime. + for _, pcol := range inputIDs { + ss.upstreamWatermarks.Store(pcol, mtime.MinTimestamp) + } + if len(inputIDs) == 1 { + ss.inputID = inputIDs[0] + } + return ss +} + +// AddPending adds elements to the pending heap. +func (ss *stageState) AddPending(newPending []element) { + ss.mu.Lock() + defer ss.mu.Unlock() + ss.pending = append(ss.pending, newPending...) + heap.Init(&ss.pending) +} + +// updateUpstreamWatermark is for the parent of the input pcollection +// to call, to update downstream stages with it's current watermark. +// This avoids downstream stages inverting lock orderings from +// calling their parent stage to get their input pcollection's watermark. +func (ss *stageState) updateUpstreamWatermark(pcol string, upstream mtime.Time) { + // A stage will only have a single upstream watermark, so + // we simply set this. + ss.upstreamWatermarks.Store(pcol, upstream) +} + +// UpstreamWatermark gets the minimum value of all upstream watermarks. +func (ss *stageState) UpstreamWatermark() (string, mtime.Time) { + upstream := mtime.MaxTimestamp + var name string + ss.upstreamWatermarks.Range(func(key, val any) bool { + // Use <= to ensure if available we get a name. + if val.(mtime.Time) <= upstream { + upstream = val.(mtime.Time) + name = key.(string) + } + return true + }) + return name, upstream +} + +// InputWatermark gets the current input watermark for the stage. +func (ss *stageState) InputWatermark() mtime.Time { + ss.mu.Lock() + defer ss.mu.Unlock() + return ss.input +} + +// OutputWatermark gets the current output watermark for the stage. +func (ss *stageState) OutputWatermark() mtime.Time { + ss.mu.Lock() + defer ss.mu.Unlock() + return ss.output +} + +// startBundle initializes a bundle with elements if possible. +// A bundle only starts if there are elements at all, and if it's +// an aggregation stage, if the windowing stratgy allows it. +func (ss *stageState) startBundle(watermark mtime.Time, genBundID func() string) (string, bool) { + defer func() { + if e := recover(); e != nil { + panic(fmt.Sprintf("generating bundle for stage %v at %v panicked\n%v", ss.ID, watermark, e)) + } + }() + ss.mu.Lock() + defer ss.mu.Unlock() + + var toProcess, notYet []element + for _, e := range ss.pending { + if !ss.aggregate || ss.aggregate && ss.strat.EarliestCompletion(e.window) <= watermark { + toProcess = append(toProcess, e) + } else { + notYet = append(notYet, e) + } + } + ss.pending = notYet + heap.Init(&ss.pending) + + if len(toProcess) == 0 { + return "", false + } + // Is THIS is where basic splits should happen/per element processing? + es := elements{ + es: toProcess, + minTimestamp: toProcess[0].timestamp, + } + if ss.inprogress == nil { + ss.inprogress = make(map[string]elements) + } + bundID := genBundID() + ss.inprogress[bundID] = es + return bundID, true +} + +// minimumPendingTimestamp returns the minimum pending timestamp from all pending elements, +// including in progress ones. +// +// Assumes that the pending heap is initialized if it's not empty. +func (ss *stageState) minPendingTimestamp() mtime.Time { + ss.mu.Lock() + defer ss.mu.Unlock() + minPending := mtime.MaxTimestamp + if len(ss.pending) != 0 { + minPending = ss.pending[0].timestamp + } + for _, es := range ss.inprogress { + minPending = mtime.Min(minPending, es.minTimestamp) + } + return minPending +} + +func (ss *stageState) String() string { + pcol, up := ss.UpstreamWatermark() + return fmt.Sprintf("[%v] IN: %v OUT: %v UP: %q %v, aggregation: %v", ss.ID, ss.input, ss.output, pcol, up, ss.aggregate) +} + +// updateWatermarks performs the following operations: +// +// Watermark_In' = MAX(Watermark_In, MIN(U(TS_Pending), U(Watermark_InputPCollection))) +// Watermark_Out' = MAX(Watermark_Out, MIN(Watermark_In', U(StateHold))) +// Watermark_PCollection = Watermark_Out_ProducingPTransform +func (ss *stageState) updateWatermarks(minPending, minStateHold mtime.Time, em *ElementManager) set[string] { + ss.mu.Lock() + defer ss.mu.Unlock() + + // PCollection watermarks are based on their parents's output watermark. + _, newIn := ss.UpstreamWatermark() + + // Set the input watermark based on the minimum pending elements, + // and the current input pcollection watermark. + if minPending < newIn { + newIn = minPending + } + + // If bigger, advance the input watermark. + if newIn > ss.input { + ss.input = newIn + } + // The output starts with the new input as the basis. + newOut := ss.input + + // If we're given an estimate, and it's further ahead, we use that instead. + if ss.estimatedOutput > ss.output { + newOut = ss.estimatedOutput + } + + // We adjust based on the minimum state hold. + if minStateHold < newOut { + newOut = minStateHold + } + refreshes := set[string]{} + // If bigger, advance the output watermark + if newOut > ss.output { + ss.output = newOut + for _, outputCol := range ss.outputIDs { + consumers := em.consumers[outputCol] + + for _, sID := range consumers { + em.stages[sID].updateUpstreamWatermark(outputCol, ss.output) + refreshes.insert(sID) + } + // Inform side input consumers, but don't update the upstream watermark. + for _, sID := range em.sideConsumers[outputCol] { + refreshes.insert(sID) + } + } + } + return refreshes +} + +// bundleReady returns the maximum allowed watermark for this stage, and whether +// it's permitted to execute by side inputs. +func (ss *stageState) bundleReady(em *ElementManager) (mtime.Time, bool) { + ss.mu.Lock() + defer ss.mu.Unlock() + // If the upstream watermark and the input watermark are the same, + // then we can't yet process this stage. + inputW := ss.input + _, upstreamW := ss.UpstreamWatermark() + if inputW == upstreamW { + slog.Debug("bundleReady: insufficient upstream watermark", + slog.String("stage", ss.ID), + slog.Group("watermark", + slog.Any("upstream", upstreamW), + slog.Any("input", inputW))) + return mtime.MinTimestamp, false + } + ready := true + for _, side := range ss.sides { + pID := em.pcolParents[side] + parent := em.stages[pID] + ow := parent.OutputWatermark() + if upstreamW > ow { + ready = false + } + } + return upstreamW, ready +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager_test.go b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager_test.go new file mode 100644 index 000000000000..ddfdd5b8816c --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager_test.go @@ -0,0 +1,516 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package engine + +import ( + "container/heap" + "context" + "fmt" + "io" + "testing" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + "github.com/google/go-cmp/cmp" +) + +func TestElementHeap(t *testing.T) { + elements := elementHeap{ + element{timestamp: mtime.EndOfGlobalWindowTime}, + element{timestamp: mtime.MaxTimestamp}, + element{timestamp: 3}, + element{timestamp: mtime.MinTimestamp}, + element{timestamp: 2}, + element{timestamp: mtime.ZeroTimestamp}, + element{timestamp: 1}, + } + heap.Init(&elements) + heap.Push(&elements, element{timestamp: 4}) + + if got, want := elements.Len(), len(elements); got != want { + t.Errorf("elements.Len() = %v, want %v", got, want) + } + if got, want := elements[0].timestamp, mtime.MinTimestamp; got != want { + t.Errorf("elements[0].timestamp = %v, want %v", got, want) + } + + wanted := []mtime.Time{mtime.MinTimestamp, mtime.ZeroTimestamp, 1, 2, 3, 4, mtime.EndOfGlobalWindowTime, mtime.MaxTimestamp} + for i, want := range wanted { + if got := heap.Pop(&elements).(element).timestamp; got != want { + t.Errorf("[%d] heap.Pop(&elements).(element).timestamp = %v, want %v", i, got, want) + } + } +} + +func TestStageState_minPendingTimestamp(t *testing.T) { + + newState := func() *stageState { + return makeStageState("test", []string{"testInput"}, nil, []string{"testOutput"}) + } + t.Run("noElements", func(t *testing.T) { + ss := newState() + got := ss.minPendingTimestamp() + want := mtime.MaxTimestamp + if got != want { + t.Errorf("ss.minPendingTimestamp() = %v, want %v", got, want) + } + }) + + want := mtime.ZeroTimestamp - 20 + t.Run("onlyPending", func(t *testing.T) { + ss := newState() + ss.pending = elementHeap{ + element{timestamp: mtime.EndOfGlobalWindowTime}, + element{timestamp: mtime.MaxTimestamp}, + element{timestamp: 3}, + element{timestamp: want}, + element{timestamp: 2}, + element{timestamp: mtime.ZeroTimestamp}, + element{timestamp: 1}, + } + heap.Init(&ss.pending) + + got := ss.minPendingTimestamp() + if got != want { + t.Errorf("ss.minPendingTimestamp() = %v, want %v", got, want) + } + }) + + t.Run("onlyInProgress", func(t *testing.T) { + ss := newState() + ss.inprogress = map[string]elements{ + "a": { + es: []element{ + {timestamp: mtime.EndOfGlobalWindowTime}, + {timestamp: mtime.MaxTimestamp}, + }, + minTimestamp: mtime.EndOfGlobalWindowTime, + }, + "b": { + es: []element{ + {timestamp: 3}, + {timestamp: want}, + {timestamp: 2}, + {timestamp: 1}, + }, + minTimestamp: want, + }, + "c": { + es: []element{ + {timestamp: mtime.ZeroTimestamp}, + }, + minTimestamp: mtime.ZeroTimestamp, + }, + } + + got := ss.minPendingTimestamp() + if got != want { + t.Errorf("ss.minPendingTimestamp() = %v, want %v", got, want) + } + }) + + t.Run("minInPending", func(t *testing.T) { + ss := newState() + ss.pending = elementHeap{ + {timestamp: 3}, + {timestamp: want}, + {timestamp: 2}, + {timestamp: 1}, + } + heap.Init(&ss.pending) + ss.inprogress = map[string]elements{ + "a": { + es: []element{ + {timestamp: mtime.EndOfGlobalWindowTime}, + {timestamp: mtime.MaxTimestamp}, + }, + minTimestamp: mtime.EndOfGlobalWindowTime, + }, + "c": { + es: []element{ + {timestamp: mtime.ZeroTimestamp}, + }, + minTimestamp: mtime.ZeroTimestamp, + }, + } + + got := ss.minPendingTimestamp() + if got != want { + t.Errorf("ss.minPendingTimestamp() = %v, want %v", got, want) + } + }) + t.Run("minInProgress", func(t *testing.T) { + ss := newState() + ss.pending = elementHeap{ + {timestamp: 3}, + {timestamp: 2}, + {timestamp: 1}, + } + heap.Init(&ss.pending) + ss.inprogress = map[string]elements{ + "a": { + es: []element{ + {timestamp: want}, + {timestamp: mtime.EndOfGlobalWindowTime}, + {timestamp: mtime.MaxTimestamp}, + }, + minTimestamp: want, + }, + "c": { + es: []element{ + {timestamp: mtime.ZeroTimestamp}, + }, + minTimestamp: mtime.ZeroTimestamp, + }, + } + + got := ss.minPendingTimestamp() + if got != want { + t.Errorf("ss.minPendingTimestamp() = %v, want %v", got, want) + } + }) +} + +func TestStageState_UpstreamWatermark(t *testing.T) { + impulse := makeStageState("impulse", nil, nil, []string{"output"}) + _, up := impulse.UpstreamWatermark() + if got, want := up, mtime.MaxTimestamp; got != want { + t.Errorf("impulse.UpstreamWatermark() = %v, want %v", got, want) + } + + dofn := makeStageState("dofn", []string{"input"}, nil, []string{"output"}) + dofn.updateUpstreamWatermark("input", 42) + + _, up = dofn.UpstreamWatermark() + if got, want := up, mtime.Time(42); got != want { + t.Errorf("dofn.UpstreamWatermark() = %v, want %v", got, want) + } + + flatten := makeStageState("flatten", []string{"a", "b", "c"}, nil, []string{"output"}) + flatten.updateUpstreamWatermark("a", 50) + flatten.updateUpstreamWatermark("b", 42) + flatten.updateUpstreamWatermark("c", 101) + _, up = flatten.UpstreamWatermark() + if got, want := up, mtime.Time(42); got != want { + t.Errorf("flatten.UpstreamWatermark() = %v, want %v", got, want) + } +} + +func TestStageState_updateWatermarks(t *testing.T) { + inputCol := "testInput" + outputCol := "testOutput" + newState := func() (*stageState, *stageState, *ElementManager) { + underTest := makeStageState("underTest", []string{inputCol}, nil, []string{outputCol}) + outStage := makeStageState("outStage", []string{outputCol}, nil, nil) + em := &ElementManager{ + consumers: map[string][]string{ + inputCol: {underTest.ID}, + outputCol: {outStage.ID}, + }, + stages: map[string]*stageState{ + outStage.ID: outStage, + underTest.ID: underTest, + }, + } + return underTest, outStage, em + } + + tests := []struct { + name string + initInput, initOutput mtime.Time + upstream, minPending, minStateHold mtime.Time + wantInput, wantOutput, wantDownstream mtime.Time + }{ + { + name: "initialized", + initInput: mtime.MinTimestamp, + initOutput: mtime.MinTimestamp, + upstream: mtime.MinTimestamp, + minPending: mtime.EndOfGlobalWindowTime, + minStateHold: mtime.EndOfGlobalWindowTime, + wantInput: mtime.MinTimestamp, // match default + wantOutput: mtime.MinTimestamp, // match upstream + wantDownstream: mtime.MinTimestamp, // match upstream + }, { + name: "upstream", + initInput: mtime.MinTimestamp, + initOutput: mtime.MinTimestamp, + upstream: mtime.ZeroTimestamp, + minPending: mtime.EndOfGlobalWindowTime, + minStateHold: mtime.EndOfGlobalWindowTime, + wantInput: mtime.ZeroTimestamp, // match upstream + wantOutput: mtime.ZeroTimestamp, // match upstream + wantDownstream: mtime.ZeroTimestamp, // match upstream + }, { + name: "useMinPending", + initInput: mtime.MinTimestamp, + initOutput: mtime.MinTimestamp, + upstream: mtime.ZeroTimestamp, + minPending: -20, + minStateHold: mtime.EndOfGlobalWindowTime, + wantInput: -20, // match minPending + wantOutput: -20, // match minPending + wantDownstream: -20, // match minPending + }, { + name: "useStateHold", + initInput: mtime.MinTimestamp, + initOutput: mtime.MinTimestamp, + upstream: mtime.ZeroTimestamp, + minPending: -20, + minStateHold: -30, + wantInput: -20, // match minPending + wantOutput: -30, // match state hold + wantDownstream: -30, // match state hold + }, { + name: "noAdvance", + initInput: 20, + initOutput: 30, + upstream: mtime.MinTimestamp, + wantInput: 20, // match original input + wantOutput: 30, // match original output + wantDownstream: mtime.MinTimestamp, // not propagated + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ss, outStage, em := newState() + ss.input = test.initInput + ss.output = test.initOutput + ss.updateUpstreamWatermark(inputCol, test.upstream) + ss.updateWatermarks(test.minPending, test.minStateHold, em) + if got, want := ss.input, test.wantInput; got != want { + pcol, up := ss.UpstreamWatermark() + t.Errorf("ss.updateWatermarks(%v,%v); ss.input = %v, want %v (upstream %v %v)", test.minPending, test.minStateHold, got, want, pcol, up) + } + if got, want := ss.output, test.wantOutput; got != want { + pcol, up := ss.UpstreamWatermark() + t.Errorf("ss.updateWatermarks(%v,%v); ss.output = %v, want %v (upstream %v %v)", test.minPending, test.minStateHold, got, want, pcol, up) + } + _, up := outStage.UpstreamWatermark() + if got, want := up, test.wantDownstream; got != want { + t.Errorf("outStage.UpstreamWatermark() = %v, want %v", got, want) + } + }) + } + +} + +func TestElementManager(t *testing.T) { + t.Run("impulse", func(t *testing.T) { + em := NewElementManager(Config{}) + em.AddStage("impulse", nil, nil, []string{"output"}) + em.AddStage("dofn", []string{"output"}, nil, nil) + + em.Impulse("impulse") + + if got, want := em.stages["impulse"].OutputWatermark(), mtime.MaxTimestamp; got != want { + t.Fatalf("impulse.OutputWatermark() = %v, want %v", got, want) + } + + var i int + ch := em.Bundles(context.Background(), func() string { + defer func() { i++ }() + return fmt.Sprintf("%v", i) + }) + rb, ok := <-ch + if !ok { + t.Error("Bundles channel unexpectedly closed") + } + if got, want := rb.StageID, "dofn"; got != want { + t.Errorf("stage to execute = %v, want %v", got, want) + } + em.PersistBundle(rb, nil, TentativeData{}, PColInfo{}, nil, nil) + _, ok = <-ch + if ok { + t.Error("Bundles channel expected to be closed") + } + if got, want := i, 1; got != want { + t.Errorf("got %v bundles, want %v", got, want) + } + }) + + info := PColInfo{ + GlobalID: "generic_info", // GlobalID isn't used except for debugging. + WDec: exec.MakeWindowDecoder(coder.NewGlobalWindow()), + WEnc: exec.MakeWindowEncoder(coder.NewGlobalWindow()), + EDec: func(r io.Reader) []byte { + b, err := io.ReadAll(r) + if err != nil { + t.Fatalf("error decoding \"generic_info\" data:%v", err) + } + return b + }, + } + es := elements{ + es: []element{{ + window: window.GlobalWindow{}, + timestamp: mtime.MinTimestamp, + pane: typex.NoFiringPane(), + elmBytes: []byte{3, 65, 66, 67}, // "ABC" + }}, + minTimestamp: mtime.MinTimestamp, + } + + t.Run("dofn", func(t *testing.T) { + em := NewElementManager(Config{}) + em.AddStage("impulse", nil, nil, []string{"input"}) + em.AddStage("dofn1", []string{"input"}, nil, []string{"output"}) + em.AddStage("dofn2", []string{"output"}, nil, nil) + em.Impulse("impulse") + + var i int + ch := em.Bundles(context.Background(), func() string { + defer func() { i++ }() + t.Log("generating bundle", i) + return fmt.Sprintf("%v", i) + }) + rb, ok := <-ch + if !ok { + t.Error("Bundles channel unexpectedly closed") + } + t.Log("received bundle", i) + + td := TentativeData{} + for _, d := range es.ToData(info) { + td.WriteData("output", d) + } + outputCoders := map[string]PColInfo{ + "output": info, + } + + em.PersistBundle(rb, outputCoders, td, info, nil, nil) + rb, ok = <-ch + if !ok { + t.Error("Bundles channel not expected to be closed") + } + // Check the data is what's expected: + data := em.InputForBundle(rb, info) + if got, want := len(data), 1; got != want { + t.Errorf("data len = %v, want %v", got, want) + } + if !cmp.Equal([]byte{127, 223, 59, 100, 90, 28, 172, 9, 0, 0, 0, 1, 15, 3, 65, 66, 67}, data[0]) { + t.Errorf("unexpected data, got %v", data[0]) + } + em.PersistBundle(rb, outputCoders, TentativeData{}, info, nil, nil) + rb, ok = <-ch + if ok { + t.Error("Bundles channel expected to be closed", rb) + } + + if got, want := i, 2; got != want { + t.Errorf("got %v bundles, want %v", got, want) + } + }) + + t.Run("side", func(t *testing.T) { + em := NewElementManager(Config{}) + em.AddStage("impulse", nil, nil, []string{"input"}) + em.AddStage("dofn1", []string{"input"}, nil, []string{"output"}) + em.AddStage("dofn2", []string{"input"}, []string{"output"}, nil) + em.Impulse("impulse") + + var i int + ch := em.Bundles(context.Background(), func() string { + defer func() { i++ }() + t.Log("generating bundle", i) + return fmt.Sprintf("%v", i) + }) + rb, ok := <-ch + if !ok { + t.Error("Bundles channel unexpectedly closed") + } + t.Log("received bundle", i) + + if got, want := rb.StageID, "dofn1"; got != want { + t.Fatalf("stage to execute = %v, want %v", got, want) + } + + td := TentativeData{} + for _, d := range es.ToData(info) { + td.WriteData("output", d) + } + outputCoders := map[string]PColInfo{ + "output": info, + "input": info, + "impulse": info, + } + + em.PersistBundle(rb, outputCoders, td, info, nil, nil) + rb, ok = <-ch + if !ok { + t.Fatal("Bundles channel not expected to be closed") + } + if got, want := rb.StageID, "dofn2"; got != want { + t.Fatalf("stage to execute = %v, want %v", got, want) + } + em.PersistBundle(rb, outputCoders, TentativeData{}, info, nil, nil) + rb, ok = <-ch + if ok { + t.Error("Bundles channel expected to be closed") + } + + if got, want := i, 2; got != want { + t.Errorf("got %v bundles, want %v", got, want) + } + }) + t.Run("residual", func(t *testing.T) { + em := NewElementManager(Config{}) + em.AddStage("impulse", nil, nil, []string{"input"}) + em.AddStage("dofn", []string{"input"}, nil, nil) + em.Impulse("impulse") + + var i int + ch := em.Bundles(context.Background(), func() string { + defer func() { i++ }() + t.Log("generating bundle", i) + return fmt.Sprintf("%v", i) + }) + rb, ok := <-ch + if !ok { + t.Error("Bundles channel unexpectedly closed") + } + t.Log("received bundle", i) + + // Add a residual + resid := es.ToData(info) + em.PersistBundle(rb, nil, TentativeData{}, info, resid, nil) + rb, ok = <-ch + if !ok { + t.Error("Bundles channel not expected to be closed") + } + // Check the data is what's expected: + data := em.InputForBundle(rb, info) + if got, want := len(data), 1; got != want { + t.Errorf("data len = %v, want %v", got, want) + } + if !cmp.Equal([]byte{127, 223, 59, 100, 90, 28, 172, 9, 0, 0, 0, 1, 15, 3, 65, 66, 67}, data[0]) { + t.Errorf("unexpected data, got %v", data[0]) + } + em.PersistBundle(rb, nil, TentativeData{}, info, nil, nil) + rb, ok = <-ch + if ok { + t.Error("Bundles channel expected to be closed", rb) + } + + if got, want := i, 2; got != want { + t.Errorf("got %v bundles, want %v", got, want) + } + }) +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/strategy.go b/sdks/go/pkg/beam/runners/prism/internal/engine/strategy.go new file mode 100644 index 000000000000..44e6064958c0 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/engine/strategy.go @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package engine + +import ( + "fmt" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" +) + +type winStrat interface { + EarliestCompletion(typex.Window) mtime.Time +} + +type defaultStrat struct{} + +func (ws defaultStrat) EarliestCompletion(w typex.Window) mtime.Time { + return w.MaxTimestamp() +} + +func (defaultStrat) String() string { + return "default" +} + +type sessionStrat struct { + GapSize time.Duration +} + +func (ws sessionStrat) EarliestCompletion(w typex.Window) mtime.Time { + return w.MaxTimestamp().Add(ws.GapSize) +} + +func (ws sessionStrat) String() string { + return fmt.Sprintf("session[GapSize:%v]", ws.GapSize) +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/strategy_test.go b/sdks/go/pkg/beam/runners/prism/internal/engine/strategy_test.go new file mode 100644 index 000000000000..9d558396f806 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/engine/strategy_test.go @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package engine + +import ( + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" +) + +func TestEarliestCompletion(t *testing.T) { + tests := []struct { + strat winStrat + input typex.Window + want mtime.Time + }{ + {defaultStrat{}, window.GlobalWindow{}, mtime.EndOfGlobalWindowTime}, + {defaultStrat{}, window.IntervalWindow{Start: 0, End: 4}, 3}, + {defaultStrat{}, window.IntervalWindow{Start: mtime.MinTimestamp, End: mtime.MaxTimestamp}, mtime.MaxTimestamp - 1}, + {sessionStrat{}, window.IntervalWindow{Start: 0, End: 4}, 3}, + {sessionStrat{GapSize: 3 * time.Millisecond}, window.IntervalWindow{Start: 0, End: 4}, 6}, + } + + for _, test := range tests { + if got, want := test.strat.EarliestCompletion(test.input), test.want; got != want { + t.Errorf("%v.EarliestCompletion(%v)) = %v, want %v", test.strat, test.input, got, want) + } + } +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go b/sdks/go/pkg/beam/runners/prism/internal/execute.go new file mode 100644 index 000000000000..2329a43d2144 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go @@ -0,0 +1,304 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + "fmt" + "io" + "sort" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/maps" + "golang.org/x/exp/slog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/protobuf/proto" +) + +// RunPipeline starts the main thread fo executing this job. +// It's analoguous to the manager side process for a distributed pipeline. +// It will begin "workers" +func RunPipeline(j *jobservices.Job) { + j.SendMsg("starting " + j.String()) + j.Start() + + // In a "proper" runner, we'd iterate through all the + // environments, and start up docker containers, but + // here, we only want and need the go one, operating + // in loopback mode. + env := "go" + wk := worker.New(env) // Cheating by having the worker id match the environment id. + go wk.Serve() + + // When this function exits, we + defer func() { + j.CancelFn() + }() + go runEnvironment(j.RootCtx, j, env, wk) + + j.SendMsg("running " + j.String()) + j.Running() + + executePipeline(j.RootCtx, wk, j) + j.SendMsg("pipeline completed " + j.String()) + + // Stop the worker. + wk.Stop() + + j.SendMsg("terminating " + j.String()) + j.Done() +} + +// TODO move environment handling to the worker package. + +func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) { + // TODO fix broken abstraction. + // We're starting a worker pool here, because that's the loopback environment. + // It's sort of a mess, largely because of loopback, which has + // a different flow from a provisioned docker container. + e := j.Pipeline.GetComponents().GetEnvironments()[env] + switch e.GetUrn() { + case urns.EnvExternal: + ep := &pipepb.ExternalPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { + slog.Error("unmarshing environment payload", err, slog.String("envID", wk.ID)) + } + externalEnvironment(ctx, ep, wk) + slog.Info("environment stopped", slog.String("envID", wk.String()), slog.String("job", j.String())) + default: + panic(fmt.Sprintf("environment %v with urn %v unimplemented", env, e.GetUrn())) + } +} + +func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { + conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) + } + defer conn.Close() + pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) + + endpoint := &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + } + + pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ + WorkerId: wk.ID, + ControlEndpoint: endpoint, + LoggingEndpoint: endpoint, + ArtifactEndpoint: endpoint, + ProvisionEndpoint: endpoint, + Params: nil, + }) + + // Job processing happens here, but orchestrated by other goroutines + // This goroutine blocks until the context is cancelled, signalling + // that the pool runner should stop the worker. + <-ctx.Done() + + // Previous context cancelled so we need a new one + // for this request. + pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ + WorkerId: wk.ID, + }) +} + +type transformExecuter interface { + ExecuteUrns() []string + ExecuteWith(t *pipepb.PTransform) string + ExecuteTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components, watermark mtime.Time, data [][]byte) *worker.B +} + +type processor struct { + transformExecuters map[string]transformExecuter +} + +func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) { + pipeline := j.Pipeline + comps := proto.Clone(pipeline.GetComponents()).(*pipepb.Components) + + // TODO, configure the preprocessor from pipeline options. + // Maybe change these returns to a single struct for convenience and further + // annotation? + + handlers := []any{ + Combine(CombineCharacteristic{EnableLifting: true}), + ParDo(ParDoCharacteristic{DisableSDF: true}), + Runner(RunnerCharacteristic{ + SDKFlatten: false, + }), + } + + proc := processor{ + transformExecuters: map[string]transformExecuter{}, + } + + var preppers []transformPreparer + for _, h := range handlers { + if th, ok := h.(transformPreparer); ok { + preppers = append(preppers, th) + } + if th, ok := h.(transformExecuter); ok { + for _, urn := range th.ExecuteUrns() { + proc.transformExecuters[urn] = th + } + } + } + + prepro := newPreprocessor(preppers) + + topo := prepro.preProcessGraph(comps) + ts := comps.GetTransforms() + + em := engine.NewElementManager(engine.Config{}) + + // TODO move this loop and code into the preprocessor instead. + stages := map[string]*stage{} + var impulses []string + for i, stage := range topo { + if len(stage.transforms) != 1 { + panic(fmt.Sprintf("unsupported stage[%d]: contains multiple transforms: %v; TODO: implement fusion", i, stage.transforms)) + } + tid := stage.transforms[0] + t := ts[tid] + urn := t.GetSpec().GetUrn() + stage.exe = proc.transformExecuters[urn] + + // Stopgap until everythinng's moved to handlers. + stage.envID = t.GetEnvironmentId() + if stage.exe != nil { + stage.envID = stage.exe.ExecuteWith(t) + } + stage.ID = wk.NextStage() + + switch stage.envID { + case "": // Runner Transforms + + var onlyOut string + for _, out := range t.GetOutputs() { + onlyOut = out + } + stage.OutputsToCoders = map[string]engine.PColInfo{} + coders := map[string]*pipepb.Coder{} + makeWindowedValueCoder(onlyOut, comps, coders) + + col := comps.GetPcollections()[onlyOut] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + + stage.OutputsToCoders[onlyOut] = engine.PColInfo{ + GlobalID: onlyOut, + WDec: wDec, + WEnc: wEnc, + EDec: ed, + } + + // There's either 0, 1 or many inputs, but they should be all the same + // so break after the first one. + for _, global := range t.GetInputs() { + col := comps.GetPcollections()[global] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + stage.inputInfo = engine.PColInfo{ + GlobalID: global, + WDec: wDec, + WEnc: wEnc, + EDec: ed, + } + break + } + + switch urn { + case urns.TransformGBK: + em.AddStage(stage.ID, []string{getOnlyValue(t.GetInputs())}, nil, []string{getOnlyValue(t.GetOutputs())}) + for _, global := range t.GetInputs() { + col := comps.GetPcollections()[global] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + stage.inputInfo = engine.PColInfo{ + GlobalID: global, + WDec: wDec, + WEnc: wEnc, + EDec: ed, + } + } + em.StageAggregates(stage.ID) + case urns.TransformImpulse: + impulses = append(impulses, stage.ID) + em.AddStage(stage.ID, nil, nil, []string{getOnlyValue(t.GetOutputs())}) + case urns.TransformFlatten: + inputs := maps.Values(t.GetInputs()) + sort.Strings(inputs) + em.AddStage(stage.ID, inputs, nil, []string{getOnlyValue(t.GetOutputs())}) + } + stages[stage.ID] = stage + wk.Descriptors[stage.ID] = stage.desc + case wk.ID: + // Great! this is for this environment. // Broken abstraction. + buildStage(stage, tid, t, comps, wk) + stages[stage.ID] = stage + slog.Debug("pipelineBuild", slog.Group("stage", slog.String("ID", stage.ID), slog.String("transformName", t.GetUniqueName()))) + outputs := maps.Keys(stage.OutputsToCoders) + sort.Strings(outputs) + em.AddStage(stage.ID, []string{stage.mainInputPCol}, stage.sides, outputs) + default: + err := fmt.Errorf("unknown environment[%v]", t.GetEnvironmentId()) + slog.Error("Execute", err) + panic(err) + } + } + + // Prime the initial impulses, since we now know what consumes them. + for _, id := range impulses { + em.Impulse(id) + } + + // Execute stages here + for rb := range em.Bundles(ctx, wk.NextInst) { + s := stages[rb.StageID] + s.Execute(j, wk, comps, em, rb) + } + slog.Info("pipeline done!", slog.String("job", j.String())) +} + +func collectionPullDecoder(coldCId string, coders map[string]*pipepb.Coder, comps *pipepb.Components) func(io.Reader) []byte { + cID := lpUnknownCoders(coldCId, coders, comps.GetCoders()) + return pullDecoder(coders[cID], coders) +} + +func getWindowValueCoders(comps *pipepb.Components, col *pipepb.PCollection, coders map[string]*pipepb.Coder) (exec.WindowDecoder, exec.WindowEncoder) { + ws := comps.GetWindowingStrategies()[col.GetWindowingStrategyId()] + wcID := lpUnknownCoders(ws.GetWindowCoderId(), coders, comps.GetCoders()) + return makeWindowCoders(coders[wcID]) +} + +func getOnlyValue[K comparable, V any](in map[K]V) V { + if len(in) != 1 { + panic(fmt.Sprintf("expected single value map, had %v", len(in))) + } + for _, v := range in { + return v + } + panic("unreachable") +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute_test.go b/sdks/go/pkg/beam/runners/prism/internal/execute_test.go new file mode 100644 index 000000000000..de7247486bbc --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/execute_test.go @@ -0,0 +1,417 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + "os" + "testing" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" + "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/filter" + "github.com/apache/beam/sdks/v2/go/test/integration/primitives" +) + +func initRunner(t *testing.T) { + t.Helper() + if *jobopts.Endpoint == "" { + s := jobservices.NewServer(0, RunPipeline) + *jobopts.Endpoint = s.Endpoint() + go s.Serve() + t.Cleanup(func() { + *jobopts.Endpoint = "" + s.Stop() + }) + } + if !jobopts.IsLoopback() { + *jobopts.EnvironmentType = "loopback" + } + // Since we force loopback, avoid cross-compilation. + f, err := os.CreateTemp("", "dummy") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.Remove(f.Name()) }) + *jobopts.WorkerBinary = f.Name() +} + +func execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) { + return universal.Execute(ctx, p) +} + +func executeWithT(ctx context.Context, t *testing.T, p *beam.Pipeline) (beam.PipelineResult, error) { + t.Log("startingTest - ", t.Name()) + return execute(ctx, p) +} + +func init() { + // Not actually being used, but explicitly registering + // will avoid accidentally using a different runner for + // the tests if I change things later. + beam.RegisterRunner("testlocal", execute) +} + +func TestRunner_Pipelines(t *testing.T) { + initRunner(t) + + tests := []struct { + name string + pipeline func(s beam.Scope) + metrics func(t *testing.T, pr beam.PipelineResult) + }{ + { + name: "simple", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col := beam.ParDo(s, dofn1, imp) + beam.ParDo(s, &int64Check{ + Name: "simple", + Want: []int{1, 2, 3}, + }, col) + }, + }, { + name: "sequence", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + beam.Seq(s, imp, dofn1, dofn2, dofn2, dofn2, &int64Check{Name: "sequence", Want: []int{4, 5, 6}}) + }, + }, { + name: "gbk", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col := beam.ParDo(s, dofnKV, imp) + gbk := beam.GroupByKey(s, col) + beam.Seq(s, gbk, dofnGBK, &int64Check{Name: "gbk", Want: []int{9, 12}}) + }, + }, { + name: "gbk2", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col := beam.ParDo(s, dofnKV2, imp) + gbk := beam.GroupByKey(s, col) + beam.Seq(s, gbk, dofnGBK2, &stringCheck{Name: "gbk2", Want: []string{"aaa", "bbb"}}) + }, + }, { + name: "gbk3", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col := beam.ParDo(s, dofnKV3, imp) + gbk := beam.GroupByKey(s, col) + beam.Seq(s, gbk, dofnGBK3, &stringCheck{Name: "gbk3", Want: []string{"{a 1}: {a 1}"}}) + }, + }, { + name: "sink_nooutputs", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + beam.ParDo0(s, dofnSink, imp) + }, + metrics: func(t *testing.T, pr beam.PipelineResult) { + qr := pr.Metrics().Query(func(sr metrics.SingleResult) bool { + return sr.Name() == "sunk" + }) + if got, want := qr.Counters()[0].Committed, int64(73); got != want { + t.Errorf("pr.Metrics.Query(Name = \"sunk\")).Committed = %v, want %v", got, want) + } + }, + }, { + name: "fork_impulse", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofn1, imp) + col2 := beam.ParDo(s, dofn1, imp) + beam.ParDo(s, &int64Check{ + Name: "fork check1", + Want: []int{1, 2, 3}, + }, col1) + beam.ParDo(s, &int64Check{ + Name: "fork check2", + Want: []int{1, 2, 3}, + }, col2) + }, + }, { + name: "fork_postDoFn", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col := beam.ParDo(s, dofn1, imp) + beam.ParDo(s, &int64Check{ + Name: "fork check1", + Want: []int{1, 2, 3}, + }, col) + beam.ParDo(s, &int64Check{ + Name: "fork check2", + Want: []int{1, 2, 3}, + }, col) + }, + }, { + name: "fork_multipleOutputs1", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1, col2, col3, col4, col5 := beam.ParDo5(s, dofn1x5, imp) + beam.ParDo(s, &int64Check{ + Name: "col1", + Want: []int{1, 6}, + }, col1) + beam.ParDo(s, &int64Check{ + Name: "col2", + Want: []int{2, 7}, + }, col2) + beam.ParDo(s, &int64Check{ + Name: "col3", + Want: []int{3, 8}, + }, col3) + beam.ParDo(s, &int64Check{ + Name: "col4", + Want: []int{4, 9}, + }, col4) + beam.ParDo(s, &int64Check{ + Name: "col5", + Want: []int{5, 10}, + }, col5) + }, + }, { + name: "fork_multipleOutputs2", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1, col2, col3, col4, col5 := beam.ParDo5(s, dofn1x5, imp) + beam.ParDo(s, &int64Check{ + Name: "col1", + Want: []int{1, 6}, + }, col1) + beam.ParDo(s, &int64Check{ + Name: "col2", + Want: []int{2, 7}, + }, col2) + beam.ParDo(s, &int64Check{ + Name: "col3", + Want: []int{3, 8}, + }, col3) + beam.ParDo(s, &int64Check{ + Name: "col4", + Want: []int{4, 9}, + }, col4) + beam.ParDo(s, &int64Check{ + Name: "col5", + Want: []int{5, 10}, + }, col5) + }, + }, { + name: "flatten", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofn1, imp) + col2 := beam.ParDo(s, dofn1, imp) + flat := beam.Flatten(s, col1, col2) + beam.ParDo(s, &int64Check{ + Name: "flatten check", + Want: []int{1, 1, 2, 2, 3, 3}, + }, flat) + }, + }, { + name: "sideinput_iterable_oneimpulse", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofn1, imp) + sum := beam.ParDo(s, dofn2x1, imp, beam.SideInput{Input: col1}) + beam.ParDo(s, &int64Check{ + Name: "iter sideinput check", + Want: []int{6}, + }, sum) + }, + }, { + name: "sideinput_iterable_twoimpulse", + pipeline: func(s beam.Scope) { + imp1 := beam.Impulse(s) + col1 := beam.ParDo(s, dofn1, imp1) + imp2 := beam.Impulse(s) + sum := beam.ParDo(s, dofn2x1, imp2, beam.SideInput{Input: col1}) + beam.ParDo(s, &int64Check{ + Name: "iter sideinput check", + Want: []int{6}, + }, sum) + }, + }, { + name: "sideinput_iterableKV", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofnKV, imp) + keys, sum := beam.ParDo2(s, dofn2x2KV, imp, beam.SideInput{Input: col1}) + beam.ParDo(s, &stringCheck{ + Name: "iterKV sideinput check K", + Want: []string{"a", "a", "a", "b", "b", "b"}, + }, keys) + beam.ParDo(s, &int64Check{ + Name: "iterKV sideinput check V", + Want: []int{21}, + }, sum) + }, + }, { + name: "sideinput_iterableKV", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofnKV, imp) + keys, sum := beam.ParDo2(s, dofn2x2KV, imp, beam.SideInput{Input: col1}) + beam.ParDo(s, &stringCheck{ + Name: "iterKV sideinput check K", + Want: []string{"a", "a", "a", "b", "b", "b"}, + }, keys) + beam.ParDo(s, &int64Check{ + Name: "iterKV sideinput check V", + Want: []int{21}, + }, sum) + }, + }, { + name: "sideinput_multimap", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofnKV, imp) + keys := filter.Distinct(s, beam.DropValue(s, col1)) + ks, sum := beam.ParDo2(s, dofnMultiMap, keys, beam.SideInput{Input: col1}) + beam.ParDo(s, &stringCheck{ + Name: "multiMap sideinput check K", + Want: []string{"a", "b"}, + }, ks) + beam.ParDo(s, &int64Check{ + Name: "multiMap sideinput check V", + Want: []int{9, 12}, + }, sum) + }, + }, { + // Ensures topological sort is correct. + name: "sideinput_2iterable", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col0 := beam.ParDo(s, dofn1, imp) + col1 := beam.ParDo(s, dofn1, imp) + col2 := beam.ParDo(s, dofn2, col1) + sum := beam.ParDo(s, dofn3x1, col0, beam.SideInput{Input: col1}, beam.SideInput{Input: col2}) + beam.ParDo(s, &int64Check{ + Name: "iter sideinput check", + Want: []int{16, 17, 18}, + }, sum) + }, + }, { + name: "combine_perkey", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + in := beam.ParDo(s, dofn1kv, imp) + keyedsum := beam.CombinePerKey(s, combineIntSum, in) + sum := beam.DropKey(s, keyedsum) + beam.ParDo(s, &int64Check{ + Name: "combine", + Want: []int{6}, + }, sum) + }, + }, { + name: "combine_global", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + in := beam.ParDo(s, dofn1, imp) + sum := beam.Combine(s, combineIntSum, in) + beam.ParDo(s, &int64Check{ + Name: "combine", + Want: []int{6}, + }, sum) + }, + }, { + name: "sdf_single_split", + pipeline: func(s beam.Scope) { + configs := beam.Create(s, SourceConfig{NumElements: 10, InitialSplits: 1}) + in := beam.ParDo(s, &intRangeFn{}, configs) + beam.ParDo(s, &int64Check{ + Name: "sdf_single", + Want: []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, in) + }, + }, { + name: "WindowedSideInputs", + pipeline: primitives.ValidateWindowedSideInputs, + }, { + name: "WindowSums_GBK", + pipeline: primitives.WindowSums_GBK, + }, { + name: "WindowSums_Lifted", + pipeline: primitives.WindowSums_Lifted, + }, { + name: "ProcessContinuations_globalCombine", + pipeline: func(s beam.Scope) { + out := beam.ParDo(s, &selfCheckpointingDoFn{}, beam.Impulse(s)) + passert.Count(s, out, "num ints", 10) + }, + }, { + name: "flatten_to_sideInput", + pipeline: func(s beam.Scope) { + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofn1, imp) + col2 := beam.ParDo(s, dofn1, imp) + flat := beam.Flatten(s, col1, col2) + beam.ParDo(s, &int64Check{ + Name: "flatten check", + Want: []int{1, 1, 2, 2, 3, 3}, + }, flat) + passert.NonEmpty(s, flat) + }, + }, + } + // TODO: Explicit DoFn Failure case. + // TODO: Session windows, where some are not merged. + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + p, s := beam.NewPipelineWithRoot() + test.pipeline(s) + pr, err := executeWithT(context.Background(), t, p) + if err != nil { + t.Fatal(err) + } + if test.metrics != nil { + test.metrics(t, pr) + } + }) + } +} + +func TestRunner_Metrics(t *testing.T) { + initRunner(t) + t.Run("counter", func(t *testing.T) { + p, s := beam.NewPipelineWithRoot() + imp := beam.Impulse(s) + beam.ParDo(s, dofn1Counter, imp) + pr, err := executeWithT(context.Background(), t, p) + if err != nil { + t.Fatal(err) + } + qr := pr.Metrics().Query(func(sr metrics.SingleResult) bool { + return sr.Name() == "count" + }) + if got, want := qr.Counters()[0].Committed, int64(1); got != want { + t.Errorf("pr.Metrics.Query(Name = \"count\")).Committed = %v, want %v", got, want) + } + }) +} + +// TODO: PCollection metrics tests, in particular for element counts, in multi transform pipelines +// There's a doubling bug since we re-use the same pcollection IDs for the source & sink, and +// don't do any re-writing. + +func TestMain(m *testing.M) { + ptest.MainWithDefault(m, "testlocal") +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/handlecombine.go b/sdks/go/pkg/beam/runners/prism/internal/handlecombine.go new file mode 100644 index 000000000000..ff9bd1e1c88a --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/handlecombine.go @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "fmt" + "reflect" + + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "google.golang.org/protobuf/proto" +) + +// This file retains the logic for the combine handler + +// CombineCharacteristic holds the configuration for Combines. +type CombineCharacteristic struct { + EnableLifting bool // Sets whether a combine composite does combiner lifting or not. +} + +// TODO figure out the factory we'd like. + +func Combine(config any) *combine { + return &combine{config: config.(CombineCharacteristic)} +} + +// combine represents an instance of the combine handler. +type combine struct { + config CombineCharacteristic +} + +// ConfigURN returns the name for combine in the configuration file. +func (*combine) ConfigURN() string { + return "combine" +} + +func (*combine) ConfigCharacteristic() reflect.Type { + return reflect.TypeOf((*CombineCharacteristic)(nil)).Elem() +} + +var _ transformPreparer = (*combine)(nil) + +func (*combine) PrepareUrns() []string { + return []string{urns.TransformCombinePerKey} +} + +// PrepareTransform returns lifted combines and removes the leaves if enabled. Otherwise returns nothing. +func (h *combine) PrepareTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components) (*pipepb.Components, []string) { + // If we aren't lifting, the "default impl" for combines should be sufficient. + if !h.config.EnableLifting { + return nil, nil + } + + // To lift a combine, the spec should contain a CombinePayload. + // That contains the actual FunctionSpec for the DoFn, and the + // id for the accumulator coder. + // We can synthetically produce/determine the remaining coders for + // the Input and Output types from the existing PCollections. + // + // This means we also need to synthesize pcollections with the accumulator coder too. + + // What we have: + // Input PCol: KV -- INPUT + // -> GBK := KV> -- GROUPED_I + // -> Combine := KV -- OUTPUT + // + // What we want: + // Input PCol: KV -- INPUT + // -> PreCombine := KV -- LIFTED + // -> GBK -> KV> -- GROUPED_A + // -> MergeAccumulators := KV -- MERGED_A + // -> ExtractOutput -> KV -- OUTPUT + // + // First we need to produce new coders for Iter, KV>, and KV. + // The A coder ID is in the combine payload. + // + // Then we can produce the PCollections. + // We can reuse the INPUT and OUTPUT PCollections. + // We need LIFTED to have KV kv_k_a + // We need GROUPED_A to have KV> kv_k_iter_a + // We need MERGED_A to have KV kv_k_a + // + // GROUPED_I ends up unused. + // + // The PCollections inherit the properties of the Input PCollection + // such as Boundedness, and Windowing Strategy. + // + // With these, we can produce the PTransforms with the appropriate URNs for the + // different parts of the composite, and return the new components. + + cmbPayload := t.GetSpec().GetPayload() + cmb := &pipepb.CombinePayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(cmbPayload, cmb); err != nil { + panic(fmt.Sprintf("unable to decode ParDoPayload for transform[%v]", t.GetUniqueName())) + } + + // First lets get the key coder ID. + var pcolInID string + // There's only one input. + for _, pcol := range t.GetInputs() { + pcolInID = pcol + } + inputPCol := comps.GetPcollections()[pcolInID] + kvkiID := inputPCol.GetCoderId() + kID := comps.GetCoders()[kvkiID].GetComponentCoderIds()[0] + + // Now we can start synthesis! + // Coder IDs + aID := cmb.AccumulatorCoderId + + ckvprefix := "c" + tid + "_kv_" + + iterACID := "c" + tid + "_iter_" + aID + kvkaCID := ckvprefix + kID + "_" + aID + kvkIterACID := ckvprefix + kID + "_iter" + aID + + // PCollection IDs + nprefix := "n" + tid + "_" + liftedNID := nprefix + "lifted" + groupedNID := nprefix + "grouped" + mergedNID := nprefix + "merged" + + // Now we need the output collection ID + var pcolOutID string + // There's only one input. + for _, pcol := range t.GetOutputs() { + pcolOutID = pcol + } + + // Transform IDs + eprefix := "e" + tid + "_" + liftEID := eprefix + "lift" + gbkEID := eprefix + "gbk" + mergeEID := eprefix + "merge" + extractEID := eprefix + "extract" + + coder := func(urn string, componentIDs ...string) *pipepb.Coder { + return &pipepb.Coder{ + Spec: &pipepb.FunctionSpec{ + Urn: urn, + }, + ComponentCoderIds: componentIDs, + } + } + + pcol := func(name, coderID string) *pipepb.PCollection { + return &pipepb.PCollection{ + UniqueName: name, + CoderId: coderID, + IsBounded: inputPCol.GetIsBounded(), + WindowingStrategyId: inputPCol.GetWindowingStrategyId(), + } + } + + tform := func(name, urn, in, out, env string) *pipepb.PTransform { + return &pipepb.PTransform{ + UniqueName: name, + Spec: &pipepb.FunctionSpec{ + Urn: urn, + Payload: cmbPayload, + }, + Inputs: map[string]string{ + "i0": in, + }, + Outputs: map[string]string{ + "i0": out, + }, + EnvironmentId: env, + } + } + + newComps := &pipepb.Components{ + Coders: map[string]*pipepb.Coder{ + iterACID: coder(urns.CoderIterable, aID), + kvkaCID: coder(urns.CoderKV, kID, aID), + kvkIterACID: coder(urns.CoderKV, kID, iterACID), + }, + Pcollections: map[string]*pipepb.PCollection{ + liftedNID: pcol(liftedNID, kvkaCID), + groupedNID: pcol(groupedNID, kvkIterACID), + mergedNID: pcol(mergedNID, kvkaCID), + }, + Transforms: map[string]*pipepb.PTransform{ + liftEID: tform(liftEID, urns.TransformPreCombine, pcolInID, liftedNID, t.GetEnvironmentId()), + gbkEID: tform(gbkEID, urns.TransformGBK, liftedNID, groupedNID, ""), + mergeEID: tform(mergeEID, urns.TransformMerge, groupedNID, mergedNID, t.GetEnvironmentId()), + extractEID: tform(mergeEID, urns.TransformExtract, mergedNID, pcolOutID, t.GetEnvironmentId()), + }, + } + + // Now we return everything! + // TODO recurse through sub transforms to remove? + // We don't need to remove the composite, since we don't add it in + // when we return the new transforms, so it's not in the topology. + return newComps, t.GetSubtransforms() +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/handlepardo.go b/sdks/go/pkg/beam/runners/prism/internal/handlepardo.go new file mode 100644 index 000000000000..2ac5ca5bbf59 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/handlepardo.go @@ -0,0 +1,244 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "fmt" + "reflect" + + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "golang.org/x/exp/maps" + "google.golang.org/protobuf/proto" +) + +// This file retains the logic for the pardo handler + +// ParDoCharacteristic holds the configuration for ParDos. +type ParDoCharacteristic struct { + DisableSDF bool // Sets whether a pardo supports SDFs or not. +} + +func ParDo(config any) *pardo { + return &pardo{config: config.(ParDoCharacteristic)} +} + +// pardo represents an instance of the pardo handler. +type pardo struct { + config ParDoCharacteristic +} + +// ConfigURN returns the name for combine in the configuration file. +func (*pardo) ConfigURN() string { + return "pardo" +} + +func (*pardo) ConfigCharacteristic() reflect.Type { + return reflect.TypeOf((*ParDoCharacteristic)(nil)).Elem() +} + +var _ transformPreparer = (*pardo)(nil) + +func (*pardo) PrepareUrns() []string { + return []string{urns.TransformParDo} +} + +// PrepareTransform handles special processing with respect to ParDos, since their handling is dependant on supported features +// and requirements. +func (h *pardo) PrepareTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components) (*pipepb.Components, []string) { + + // ParDos are a pain in the butt. + // Combines, by comparison, are dramatically simpler. + // This is because for ParDos, how they are handled, and what kinds of transforms are in + // and around the ParDo, the actual shape of the graph will change. + // At their simplest, it's something a DoFn will handle on their own. + // At their most complex, they require intimate interaction with the subgraph + // bundling process, the data layer, state layers, and control layers. + // But unlike combines, which have a clear urn for composite + special payload, + // ParDos have the standard URN for composites with the standard payload. + // So always, we need to first unmarshal the payload. + + pardoPayload := t.GetSpec().GetPayload() + pdo := &pipepb.ParDoPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(pardoPayload, pdo); err != nil { + panic(fmt.Sprintf("unable to decode ParDoPayload for transform[%v]", t.GetUniqueName())) + } + + // Lets check for and remove anything that makes things less simple. + if pdo.OnWindowExpirationTimerFamilySpec == "" && + !pdo.RequestsFinalization && + !pdo.RequiresStableInput && + !pdo.RequiresTimeSortedInput && + len(pdo.StateSpecs) == 0 && + len(pdo.TimerFamilySpecs) == 0 && + pdo.RestrictionCoderId == "" { + // Which inputs are Side inputs don't change the graph further, + // so they're not included here. Any nearly any ParDo can have them. + + // At their simplest, we don't need to do anything special at pre-processing time, and simply pass through as normal. + return &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + tid: t, + }, + }, nil + } + + // Side inputs add to topology and make fusion harder to deal with + // (side input producers can't be in the same stage as their consumers) + // But we don't have fusion yet, so no worries. + + // State, Timers, Stable Input, Time Sorted Input, and some parts of SDF + // Are easier to deal including a fusion break. But We can do that with a + // runner specific transform for stable input, and another for timesorted + // input. + + // SplittableDoFns have 3 required phases and a 4th optional phase. + // + // PAIR_WITH_RESTRICTION which pairs elements with their restrictions + // Input: element; := INPUT + // Output: KV(element, restriction) := PWR + // + // SPLIT_AND_SIZE_RESTRICTIONS splits the pairs into sub element ranges + // and a relative size for each, in a float64 format. + // Input: KV(element, restriction) := PWR + // Output: KV(KV(element, restriction), float64) := SPLITnSIZED + // + // PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS actually processes the + // elements. This is also where splits need to be handled. + // In particular, primary and residual splits have the same format as the input. + // Input: KV(KV(element, restriction), size) := SPLITnSIZED + // Output: DoFn's output. := OUTPUT + // + // TRUNCATE_SIZED_RESTRICTION is how the runner has an SDK turn an + // unbounded transform into a bound one. Not needed until the pipeline + // is told to drain. + // Input: KV(KV(element, restriction), float64) := synthetic split results from above + // Output: KV(KV(element, restriction), float64). := synthetic, truncated results sent as Split n Sized + // + // So with that, we can figure out the coders we need. + // + // cE - Element Coder (same as input coder) + // cR - Restriction Coder + // cS - Size Coder (float64) + // ckvER - KV + // ckvERS - KV, Size> + // + // There could be a few output coders, but the outputs can be copied from + // the original transform directly. + + // First lets get the parallel input coder ID. + var pcolInID, inputLocalID string + for localID, globalID := range t.GetInputs() { + // The parallel input is the one that isn't a side input. + if _, ok := pdo.SideInputs[localID]; !ok { + inputLocalID = localID + pcolInID = globalID + break + } + } + inputPCol := comps.GetPcollections()[pcolInID] + cEID := inputPCol.GetCoderId() + cRID := pdo.RestrictionCoderId + cSID := "c" + tid + "size" + ckvERID := "c" + tid + "kv_ele_rest" + ckvERSID := ckvERID + "_size" + + coder := func(urn string, componentIDs ...string) *pipepb.Coder { + return &pipepb.Coder{ + Spec: &pipepb.FunctionSpec{ + Urn: urn, + }, + ComponentCoderIds: componentIDs, + } + } + + coders := map[string]*pipepb.Coder{ + ckvERID: coder(urns.CoderKV, cEID, cRID), + cSID: coder(urns.CoderDouble), + ckvERSID: coder(urns.CoderKV, ckvERID, cSID), + } + + // PCollections only have two new ones. + // INPUT -> same as ordinary DoFn + // PWR, uses ckvER + // SPLITnSIZED, uses ckvERS + // OUTPUT -> same as ordinary outputs + + nPWRID := "n" + tid + "_pwr" + nSPLITnSIZEDID := "n" + tid + "_splitnsized" + + pcol := func(name, coderID string) *pipepb.PCollection { + return &pipepb.PCollection{ + UniqueName: name, + CoderId: coderID, + IsBounded: inputPCol.GetIsBounded(), + WindowingStrategyId: inputPCol.GetWindowingStrategyId(), + } + } + + pcols := map[string]*pipepb.PCollection{ + nPWRID: pcol(nPWRID, ckvERID), + nSPLITnSIZEDID: pcol(nSPLITnSIZEDID, ckvERSID), + } + + // PTransforms have 3 new ones, with process sized elements and restrictions + // taking the brunt of the complexity, consuming the inputs + + ePWRID := "e" + tid + "_pwr" + eSPLITnSIZEDID := "e" + tid + "_splitnsize" + eProcessID := "e" + tid + "_processandsplit" + + tform := func(name, urn, in, out string) *pipepb.PTransform { + return &pipepb.PTransform{ + UniqueName: name, + Spec: &pipepb.FunctionSpec{ + Urn: urn, + Payload: pardoPayload, + }, + Inputs: map[string]string{ + inputLocalID: in, + }, + Outputs: map[string]string{ + "i0": out, + }, + EnvironmentId: t.GetEnvironmentId(), + } + } + + newInputs := maps.Clone(t.GetInputs()) + newInputs[inputLocalID] = nSPLITnSIZEDID + + tforms := map[string]*pipepb.PTransform{ + ePWRID: tform(ePWRID, urns.TransformPairWithRestriction, pcolInID, nPWRID), + eSPLITnSIZEDID: tform(eSPLITnSIZEDID, urns.TransformSplitAndSize, nPWRID, nSPLITnSIZEDID), + eProcessID: { + UniqueName: eProcessID, + Spec: &pipepb.FunctionSpec{ + Urn: urns.TransformProcessSizedElements, + Payload: pardoPayload, + }, + Inputs: newInputs, + Outputs: t.GetOutputs(), + EnvironmentId: t.GetEnvironmentId(), + }, + } + + return &pipepb.Components{ + Coders: coders, + Pcollections: pcols, + Transforms: tforms, + }, t.GetSubtransforms() +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/handlerunner.go b/sdks/go/pkg/beam/runners/prism/internal/handlerunner.go new file mode 100644 index 000000000000..e841620625e9 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/handlerunner.go @@ -0,0 +1,298 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "bytes" + "fmt" + "io" + "reflect" + "sort" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/slog" + "google.golang.org/protobuf/encoding/prototext" + "google.golang.org/protobuf/proto" +) + +// This file retains the logic for the pardo handler + +// RunnerCharacteristic holds the configuration for Runner based transforms, +// such as GBKs, Flattens. +type RunnerCharacteristic struct { + SDKFlatten bool // Sets whether we should force an SDK side flatten. + SDKGBK bool // Sets whether the GBK should be handled by the SDK, if possible by the SDK. +} + +func Runner(config any) *runner { + return &runner{config: config.(RunnerCharacteristic)} +} + +// runner represents an instance of the runner transform handler. +type runner struct { + config RunnerCharacteristic +} + +// ConfigURN returns the name for combine in the configuration file. +func (*runner) ConfigURN() string { + return "runner" +} + +func (*runner) ConfigCharacteristic() reflect.Type { + return reflect.TypeOf((*RunnerCharacteristic)(nil)).Elem() +} + +var _ transformExecuter = (*runner)(nil) + +func (*runner) ExecuteUrns() []string { + return []string{urns.TransformFlatten, urns.TransformGBK} +} + +// ExecuteWith returns what environment the +func (h *runner) ExecuteWith(t *pipepb.PTransform) string { + urn := t.GetSpec().GetUrn() + if urn == urns.TransformFlatten && !h.config.SDKFlatten { + return "" + } + if urn == urns.TransformGBK && !h.config.SDKGBK { + return "" + } + return t.GetEnvironmentId() +} + +// ExecuteTransform handles special processing with respect to runner specific transforms +func (h *runner) ExecuteTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components, watermark mtime.Time, inputData [][]byte) *worker.B { + urn := t.GetSpec().GetUrn() + var data [][]byte + var onlyOut string + for _, out := range t.GetOutputs() { + onlyOut = out + } + + switch urn { + case urns.TransformFlatten: + // Already done and collated. + data = inputData + + case urns.TransformGBK: + ws := windowingStrategy(comps, tid) + kvc := onlyInputCoderForTransform(comps, tid) + + coders := map[string]*pipepb.Coder{} + + // TODO assert this is a KV. It's probably fine, but we should fail anyway. + wcID := lpUnknownCoders(ws.GetWindowCoderId(), coders, comps.GetCoders()) + kcID := lpUnknownCoders(kvc.GetComponentCoderIds()[0], coders, comps.GetCoders()) + ecID := lpUnknownCoders(kvc.GetComponentCoderIds()[1], coders, comps.GetCoders()) + reconcileCoders(coders, comps.GetCoders()) + + wc := coders[wcID] + kc := coders[kcID] + ec := coders[ecID] + + data = append(data, gbkBytes(ws, wc, kc, ec, inputData, coders, watermark)) + if len(data[0]) == 0 { + panic("no data for GBK") + } + default: + panic(fmt.Sprintf("unimplemented runner transform[%v]", urn)) + } + + // To avoid conflicts with these single transform + // bundles, we suffix the transform IDs. + var localID string + for key := range t.GetOutputs() { + localID = key + } + + if localID == "" { + panic(fmt.Sprintf("bad transform: %v", prototext.Format(t))) + } + output := engine.TentativeData{} + for _, d := range data { + output.WriteData(onlyOut, d) + } + + dataID := tid + "_" + localID // The ID from which the consumer will read from. + b := &worker.B{ + InputTransformID: dataID, + SinkToPCollection: map[string]string{ + dataID: onlyOut, + }, + OutputData: output, + } + return b +} + +// windowingStrategy sources the transform's windowing strategy from a single parallel input. +func windowingStrategy(comps *pipepb.Components, tid string) *pipepb.WindowingStrategy { + t := comps.GetTransforms()[tid] + var inputPColID string + for _, pcolID := range t.GetInputs() { + inputPColID = pcolID + } + pcol := comps.GetPcollections()[inputPColID] + return comps.GetWindowingStrategies()[pcol.GetWindowingStrategyId()] +} + +// gbkBytes re-encodes gbk inputs in a gbk result. +func gbkBytes(ws *pipepb.WindowingStrategy, wc, kc, vc *pipepb.Coder, toAggregate [][]byte, coders map[string]*pipepb.Coder, watermark mtime.Time) []byte { + var outputTime func(typex.Window, mtime.Time) mtime.Time + switch ws.GetOutputTime() { + case pipepb.OutputTime_END_OF_WINDOW: + outputTime = func(w typex.Window, et mtime.Time) mtime.Time { + return w.MaxTimestamp() + } + default: + // TODO need to correct session logic if output time is different. + panic(fmt.Sprintf("unsupported OutputTime behavior: %v", ws.GetOutputTime())) + } + wDec, wEnc := makeWindowCoders(wc) + + type keyTime struct { + key []byte + w typex.Window + time mtime.Time + values [][]byte + } + // Map windows to a map of keys to a map of keys to time. + // We ultimately emit the window, the key, the time, and the iterable of elements, + // all contained in the final value. + windows := map[typex.Window]map[string]keyTime{} + + kd := pullDecoder(kc, coders) + vd := pullDecoder(vc, coders) + + // Right, need to get the key coder, and the element coder. + // Cus I'll need to pull out anything the runner knows how to deal with. + // And repeat. + for _, data := range toAggregate { + // Parse out each element's data, and repeat. + buf := bytes.NewBuffer(data) + for { + ws, tm, _, err := exec.DecodeWindowedValueHeader(wDec, buf) + if err == io.EOF { + break + } + if err != nil { + panic(fmt.Sprintf("can't decode windowed value header with %v: %v", wc, err)) + } + + keyByt := kd(buf) + key := string(keyByt) + value := vd(buf) + for _, w := range ws { + ft := outputTime(w, tm) + wk, ok := windows[w] + if !ok { + wk = make(map[string]keyTime) + windows[w] = wk + } + kt := wk[key] + kt.time = ft + kt.key = keyByt + kt.w = w + kt.values = append(kt.values, value) + wk[key] = kt + } + } + } + + // If the strategy is session windows, then we need to get all the windows, sort them + // and see which ones need to be merged together. + if ws.GetWindowFn().GetUrn() == urns.WindowFnSession { + slog.Debug("sorting by session window") + session := &pipepb.SessionWindowsPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(ws.GetWindowFn().GetPayload(), session); err != nil { + panic("unable to decode SessionWindowsPayload") + } + gapSize := mtime.Time(session.GetGapSize().AsDuration()) + + ordered := make([]window.IntervalWindow, 0, len(windows)) + for k := range windows { + ordered = append(ordered, k.(window.IntervalWindow)) + } + // Use a decreasing sort (latest to earliest) so we can correct + // the output timestamp to the new end of window immeadiately. + // TODO need to correct this if output time is different. + sort.Slice(ordered, func(i, j int) bool { + return ordered[i].MaxTimestamp() > ordered[j].MaxTimestamp() + }) + + cur := ordered[0] + sessionData := windows[cur] + for _, iw := range ordered[1:] { + // If they overlap, then we merge the data. + if iw.End+gapSize < cur.Start { + // Start a new session. + windows[cur] = sessionData + cur = iw + sessionData = windows[iw] + continue + } + // Extend the session + cur.Start = iw.Start + toMerge := windows[iw] + delete(windows, iw) + for k, kt := range toMerge { + skt := sessionData[k] + skt.key = kt.key + skt.w = cur + skt.values = append(skt.values, kt.values...) + sessionData[k] = skt + } + } + } + // Everything's aggregated! + // Time to turn things into a windowed KV> + + var buf bytes.Buffer + for _, w := range windows { + for _, kt := range w { + exec.EncodeWindowedValueHeader( + wEnc, + []typex.Window{kt.w}, + kt.time, + typex.NoFiringPane(), + &buf, + ) + buf.Write(kt.key) + coder.EncodeInt32(int32(len(kt.values)), &buf) + for _, value := range kt.values { + buf.Write(value) + } + } + } + return buf.Bytes() +} + +func onlyInputCoderForTransform(comps *pipepb.Components, tid string) *pipepb.Coder { + t := comps.GetTransforms()[tid] + var inputPColID string + for _, pcolID := range t.GetInputs() { + inputPColID = pcolID + } + pcol := comps.GetPcollections()[inputPColID] + return comps.GetCoders()[pcol.GetCoderId()] +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go new file mode 100644 index 000000000000..e66def5b0fe8 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobservices + +import ( + "fmt" + "io" + + jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" + "golang.org/x/exp/slog" +) + +func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingService_ReverseArtifactRetrievalServiceServer) error { + in, err := stream.Recv() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + job := s.jobs[in.GetStagingToken()] + + envs := job.Pipeline.GetComponents().GetEnvironments() + for _, env := range envs { + for _, dep := range env.GetDependencies() { + slog.Debug("GetArtifact start", + slog.Group("dep", + slog.String("urn", dep.GetTypeUrn()), + slog.String("payload", string(dep.GetTypePayload())))) + stream.Send(&jobpb.ArtifactRequestWrapper{ + Request: &jobpb.ArtifactRequestWrapper_GetArtifact{ + GetArtifact: &jobpb.GetArtifactRequest{ + Artifact: dep, + }, + }, + }) + var count int + for { + in, err := stream.Recv() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + if in.IsLast { + slog.Debug("GetArtifact finish", + slog.Group("dep", + slog.String("urn", dep.GetTypeUrn()), + slog.String("payload", string(dep.GetTypePayload()))), + slog.Int("bytesReceived", count)) + break + } + // Here's where we go through each environment's artifacts. + // We do nothing with them. + switch req := in.GetResponse().(type) { + case *jobpb.ArtifactResponseWrapper_GetArtifactResponse: + count += len(req.GetArtifactResponse.GetData()) + case *jobpb.ArtifactResponseWrapper_ResolveArtifactResponse: + err := fmt.Errorf("unexpected ResolveArtifactResponse to GetArtifact: %v", in.GetResponse()) + slog.Error("GetArtifact failure", err) + return err + } + } + } + } + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go new file mode 100644 index 000000000000..95b1ce12af93 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package jobservices handles services necessary WRT handling jobs from +// SDKs. Nominally this is the entry point for most users, and a job's +// external interactions outside of pipeline execution. +// +// This includes handling receiving, staging, and provisioning artifacts, +// and orchestrating external workers, such as for loopback mode. +// +// Execution of jobs is abstracted away to an execute function specified +// at server construction time. +package jobservices + +import ( + "context" + "fmt" + "sort" + "strings" + "sync/atomic" + + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "golang.org/x/exp/slog" + "google.golang.org/protobuf/types/known/structpb" +) + +var capabilities = map[string]struct{}{ + urns.RequirementSplittableDoFn: {}, +} + +// TODO, move back to main package, and key off of executor handlers? +// Accept whole pipeline instead, and look at every PTransform too. +func isSupported(requirements []string) error { + var unsupported []string + for _, req := range requirements { + if _, ok := capabilities[req]; !ok { + unsupported = append(unsupported, req) + } + } + if len(unsupported) > 0 { + sort.Strings(unsupported) + return fmt.Errorf("local runner doesn't support the following required features: %v", strings.Join(unsupported, ",")) + } + return nil +} + +// Job is an interface to the job services for executing pipelines. +// It allows the executor to communicate status, messages, and metrics +// back to callers of the Job Management API. +type Job struct { + key string + jobName string + + Pipeline *pipepb.Pipeline + options *structpb.Struct + + // Management side concerns. + msgChan chan string + state atomic.Value // jobpb.JobState_Enum + stateChan chan jobpb.JobState_Enum + + // Context used to terminate this job. + RootCtx context.Context + CancelFn context.CancelFunc + + metrics metricsStore +} + +func (j *Job) ContributeMetrics(payloads *fnpb.ProcessBundleResponse) { + j.metrics.ContributeMetrics(payloads) +} + +func (j *Job) String() string { + return fmt.Sprintf("%v[%v]", j.key, j.jobName) +} + +func (j *Job) LogValue() slog.Value { + return slog.GroupValue( + slog.String("key", j.key), + slog.String("name", j.jobName)) +} + +func (j *Job) SendMsg(msg string) { + j.msgChan <- msg +} + +// Start indicates that the job is preparing to execute. +func (j *Job) Start() { + j.stateChan <- jobpb.JobState_STARTING +} + +// Running indicates that the job is executing. +func (j *Job) Running() { + j.stateChan <- jobpb.JobState_RUNNING +} + +// Done indicates that the job completed successfully. +func (j *Job) Done() { + j.stateChan <- jobpb.JobState_DONE +} + +// Failed indicates that the job completed unsuccessfully. +func (j *Job) Failed() { + j.stateChan <- jobpb.JobState_FAILED +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go new file mode 100644 index 000000000000..af6c8c71a1d9 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobservices + +import ( + "context" + "fmt" + "sync/atomic" + + jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "golang.org/x/exp/slog" +) + +func (s *Server) nextId() string { + v := atomic.AddUint32(&s.index, 1) + return fmt.Sprintf("job-%03d", v) +} + +func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jobpb.PrepareJobResponse, error) { + s.mu.Lock() + defer s.mu.Unlock() + + // Since jobs execute in the background, they should not be tied to a request's context. + rootCtx, cancelFn := context.WithCancel(context.Background()) + job := &Job{ + key: s.nextId(), + Pipeline: req.GetPipeline(), + jobName: req.GetJobName(), + options: req.GetPipelineOptions(), + + msgChan: make(chan string, 100), + stateChan: make(chan jobpb.JobState_Enum, 1), + RootCtx: rootCtx, + CancelFn: cancelFn, + } + + // Queue initial state of the job. + job.state.Store(jobpb.JobState_STOPPED) + job.stateChan <- job.state.Load().(jobpb.JobState_Enum) + + if err := isSupported(job.Pipeline.GetRequirements()); err != nil { + slog.Error("unable to run job", err, slog.String("jobname", req.GetJobName())) + return nil, err + } + s.jobs[job.key] = job + return &jobpb.PrepareJobResponse{ + PreparationId: job.key, + StagingSessionToken: job.key, + ArtifactStagingEndpoint: &pipepb.ApiServiceDescriptor{ + Url: s.Endpoint(), + }, + }, nil +} + +func (s *Server) Run(ctx context.Context, req *jobpb.RunJobRequest) (*jobpb.RunJobResponse, error) { + s.mu.Lock() + job := s.jobs[req.GetPreparationId()] + s.mu.Unlock() + + // Bring up a background goroutine to allow the job to continue processing. + go s.execute(job) + + return &jobpb.RunJobResponse{ + JobId: job.key, + }, nil +} + +// GetMessageStream subscribes to a stream of state changes and messages from the job +func (s *Server) GetMessageStream(req *jobpb.JobMessagesRequest, stream jobpb.JobService_GetMessageStreamServer) error { + s.mu.Lock() + job := s.jobs[req.GetJobId()] + s.mu.Unlock() + + for { + select { + case msg := <-job.msgChan: + stream.Send(&jobpb.JobMessagesResponse{ + Response: &jobpb.JobMessagesResponse_MessageResponse{ + MessageResponse: &jobpb.JobMessage{ + MessageText: msg, + Importance: jobpb.JobMessage_JOB_MESSAGE_BASIC, + }, + }, + }) + + case state, ok := <-job.stateChan: + // TODO: Don't block job execution if WaitForCompletion isn't being run. + // The state channel means the job may only execute if something is observing + // the message stream, as the send on the state or message channel may block + // once full. + // Not a problem for tests or short lived batch, but would be hazardous for + // asynchronous jobs. + + // Channel is closed, so the job must be done. + if !ok { + state = jobpb.JobState_DONE + } + job.state.Store(state) + stream.Send(&jobpb.JobMessagesResponse{ + Response: &jobpb.JobMessagesResponse_StateResponse{ + StateResponse: &jobpb.JobStateEvent{ + State: state, + }, + }, + }) + switch state { + case jobpb.JobState_CANCELLED, jobpb.JobState_DONE, jobpb.JobState_DRAINED, jobpb.JobState_FAILED, jobpb.JobState_UPDATED: + // Reached terminal state. + return nil + } + } + } + +} + +// GetJobMetrics Fetch metrics for a given job. +func (s *Server) GetJobMetrics(ctx context.Context, req *jobpb.GetJobMetricsRequest) (*jobpb.GetJobMetricsResponse, error) { + j := s.getJob(req.GetJobId()) + if j == nil { + return nil, fmt.Errorf("GetJobMetrics: unknown jobID: %v", req.GetJobId()) + } + return &jobpb.GetJobMetricsResponse{ + Metrics: &jobpb.MetricResults{ + Attempted: j.metrics.Results(tentative), + Committed: j.metrics.Results(committed), + }, + }, nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics.go index 1dc0723e3af9..39936bae72f1 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics.go @@ -330,8 +330,6 @@ func (m *distributionInt64) accumulate(pyld []byte) error { Min: ordMin(m.dist.Min, dist.Min), Max: ordMax(m.dist.Max, dist.Max), } - fmt.Println("dist", dist) - fmt.Println("m.dist", dist) return nil } diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics_test.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics_test.go index d06d6774828e..e0346731f300 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/metrics_test.go @@ -23,7 +23,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" - "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" "github.com/google/go-cmp/cmp" "google.golang.org/protobuf/proto" @@ -73,7 +72,7 @@ func Test_metricsStore_ContributeMetrics(t *testing.T) { // TODO convert input to non-legacy metrics once we support, and then delete these. input [][]*pipepb.MonitoringInfo - want []*pipeline_v1.MonitoringInfo + want []*pipepb.MonitoringInfo }{ { name: "int64Sum", diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go new file mode 100644 index 000000000000..2f88293c1dab --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobservices + +import ( + "fmt" + "net" + "sync" + + jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" + "golang.org/x/exp/slog" + "google.golang.org/grpc" +) + +type Server struct { + jobpb.UnimplementedJobServiceServer + jobpb.UnimplementedArtifactStagingServiceServer + + // Server management + lis net.Listener + server *grpc.Server + + // Job Management + mu sync.Mutex + index uint32 + jobs map[string]*Job + + // execute defines how a job is executed. + execute func(*Job) +} + +// NewServer acquires the indicated port. +func NewServer(port int, execute func(*Job)) *Server { + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + panic(fmt.Sprintf("failed to listen: %v", err)) + } + s := &Server{ + lis: lis, + jobs: make(map[string]*Job), + execute: execute, + } + slog.Info("Serving JobManagement", slog.String("endpoint", s.Endpoint())) + var opts []grpc.ServerOption + s.server = grpc.NewServer(opts...) + jobpb.RegisterJobServiceServer(s.server, s) + jobpb.RegisterArtifactStagingServiceServer(s.server, s) + return s +} + +func (s *Server) getJob(id string) *Job { + s.mu.Lock() + defer s.mu.Unlock() + return s.jobs[id] +} + +func (s *Server) Endpoint() string { + return s.lis.Addr().String() +} + +// Serve serves on the started listener. Blocks. +func (s *Server) Serve() { + s.server.Serve(s.lis) +} + +// Stop the GRPC server. +func (s *Server) Stop() { + s.server.GracefulStop() +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go new file mode 100644 index 000000000000..2223f030ce1d --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobservices + +import ( + "context" + "sync" + "testing" + + jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "google.golang.org/protobuf/encoding/prototext" +) + +// TestServer_Lifecycle validates that a server can start and stop. +func TestServer_Lifecycle(t *testing.T) { + undertest := NewServer(0, func(j *Job) { + t.Fatalf("unexpected call to execute: %v", j) + }) + + go undertest.Serve() + + undertest.Stop() +} + +// Validates that a job can start and stop. +func TestServer_JobLifecycle(t *testing.T) { + var called sync.WaitGroup + called.Add(1) + undertest := NewServer(0, func(j *Job) { + called.Done() + }) + ctx := context.Background() + + wantPipeline := &pipepb.Pipeline{ + Requirements: []string{urns.RequirementSplittableDoFn}, + } + wantName := "testJob" + + resp, err := undertest.Prepare(ctx, &jobpb.PrepareJobRequest{ + Pipeline: wantPipeline, + JobName: wantName, + }) + if err != nil { + t.Fatalf("server.Prepare() = %v, want nil", err) + } + + if got := resp.GetPreparationId(); got == "" { + t.Fatalf("server.Prepare() = returned empty preparation ID, want non-empty: %v", prototext.Format(resp)) + } + + runResp, err := undertest.Run(ctx, &jobpb.RunJobRequest{ + PreparationId: resp.GetPreparationId(), + }) + if err != nil { + t.Fatalf("server.Run() = %v, want nil", err) + } + if got := runResp.GetJobId(); got == "" { + t.Fatalf("server.Run() = returned empty preparation ID, want non-empty") + } + // If execute is never called, this doesn't unblock and timesout. + called.Wait() + t.Log("success!") + // Nothing to cleanup because we didn't start the server. +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/preprocess.go b/sdks/go/pkg/beam/runners/prism/internal/preprocess.go new file mode 100644 index 000000000000..8769a05d38f4 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/preprocess.go @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "sort" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/pipelinex" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "golang.org/x/exp/maps" + "golang.org/x/exp/slog" +) + +// transformPreparer is an interface for handling different urns in the preprocessor +// largely for exchanging transforms for others, to be added to the complete set of +// components in the pipeline. +type transformPreparer interface { + // PrepareUrns returns the Beam URNs that this handler deals with for preprocessing. + PrepareUrns() []string + // PrepareTransform takes a PTransform proto and returns a set of new Components, and a list of + // transformIDs leaves to remove and ignore from graph processing. + PrepareTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components) (*pipepb.Components, []string) +} + +// preprocessor retains configuration for preprocessing the +// graph, such as special handling for lifted combiners or +// other configuration. +type preprocessor struct { + transformPreparers map[string]transformPreparer +} + +func newPreprocessor(preps []transformPreparer) *preprocessor { + preparers := map[string]transformPreparer{} + for _, prep := range preps { + for _, urn := range prep.PrepareUrns() { + preparers[urn] = prep + } + } + return &preprocessor{ + transformPreparers: preparers, + } +} + +// preProcessGraph takes the graph and preprocesses for consumption in bundles. +// The output is the topological sort of the transform ids. +// +// These are how transforms are related in graph form, but not the specific bundles themselves, which will come later. +// +// Handles awareness of composite transforms and similar. Ultimately, after this point +// the graph stops being a hypergraph, with composite transforms being treated as +// "leaves" downstream as needed. +// +// This is where Combines become lifted (if it makes sense, or is configured), and similar behaviors. +func (p *preprocessor) preProcessGraph(comps *pipepb.Components) []*stage { + ts := comps.GetTransforms() + + // TODO move this out of this part of the pre-processor? + leaves := map[string]struct{}{} + ignore := map[string]struct{}{} + for tid, t := range ts { + if _, ok := ignore[tid]; ok { + continue + } + + spec := t.GetSpec() + if spec == nil { + // Most composites don't have specs. + slog.Debug("transform is missing a spec", + slog.Group("transform", slog.String("ID", tid), slog.String("name", t.GetUniqueName()))) + continue + } + + // Composite Transforms basically means needing to remove the "leaves" from the + // handling set, and producing the new sub component transforms. The top level + // composite should have enough information to produce the new sub transforms. + // In particular, the inputs and outputs need to all be connected and matched up + // so the topological sort still works out. + h := p.transformPreparers[spec.GetUrn()] + if h == nil { + + // If there's an unknown urn, and it's not composite, simply add it to the leaves. + if len(t.GetSubtransforms()) == 0 { + leaves[tid] = struct{}{} + } else { + slog.Info("composite transform has unknown urn", + slog.Group("transform", slog.String("ID", tid), + slog.String("name", t.GetUniqueName()), + slog.String("urn", spec.GetUrn()))) + } + continue + } + + subs, toRemove := h.PrepareTransform(tid, t, comps) + + // Clear out unnecessary leaves from this composite for topological sort handling. + for _, key := range toRemove { + ignore[key] = struct{}{} + delete(leaves, key) + } + + // ts should be a clone, so we should be able to add new transforms into the map. + for tid, t := range subs.GetTransforms() { + leaves[tid] = struct{}{} + ts[tid] = t + } + for cid, c := range subs.GetCoders() { + comps.GetCoders()[cid] = c + } + for nid, n := range subs.GetPcollections() { + comps.GetPcollections()[nid] = n + } + // It's unlikely for these to change, but better to handle them now, to save a headache later. + for wid, w := range subs.GetWindowingStrategies() { + comps.GetWindowingStrategies()[wid] = w + } + for envid, env := range subs.GetEnvironments() { + comps.GetEnvironments()[envid] = env + } + } + + // Extract URNs for the given transform. + + keptLeaves := maps.Keys(leaves) + sort.Strings(keptLeaves) + topological := pipelinex.TopologicalSort(ts, keptLeaves) + slog.Debug("topological transform ordering", topological) + + var stages []*stage + for _, tid := range topological { + stages = append(stages, &stage{ + transforms: []string{tid}, + }) + } + return stages +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/preprocess_test.go b/sdks/go/pkg/beam/runners/prism/internal/preprocess_test.go new file mode 100644 index 000000000000..add69a7c7679 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/preprocess_test.go @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "testing" + + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/google/go-cmp/cmp" + "google.golang.org/protobuf/testing/protocmp" +) + +func Test_preprocessor_preProcessGraph(t *testing.T) { + tests := []struct { + name string + input *pipepb.Components + + wantComponents *pipepb.Components + wantStages []*stage + }{ + { + name: "noPreparer", + input: &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + "e1": { + UniqueName: "e1", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + }, + }, + }, + + wantStages: []*stage{{transforms: []string{"e1"}}}, + wantComponents: &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + "e1": { + UniqueName: "e1", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + }, + }, + }, + }, { + name: "preparer", + input: &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + "e1": { + UniqueName: "e1", + Spec: &pipepb.FunctionSpec{ + Urn: "test_urn", + }, + }, + }, + // Initialize maps because they always are by proto unmarshallers. + Pcollections: map[string]*pipepb.PCollection{}, + WindowingStrategies: map[string]*pipepb.WindowingStrategy{}, + Coders: map[string]*pipepb.Coder{}, + Environments: map[string]*pipepb.Environment{}, + }, + + wantStages: []*stage{{transforms: []string{"e1_early"}}, {transforms: []string{"e1_late"}}}, + wantComponents: &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + // Original is always kept + "e1": { + UniqueName: "e1", + Spec: &pipepb.FunctionSpec{ + Urn: "test_urn", + }, + }, + "e1_early": { + UniqueName: "e1_early", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + Outputs: map[string]string{"i0": "pcol1"}, + EnvironmentId: "env1", + }, + "e1_late": { + UniqueName: "e1_late", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + Inputs: map[string]string{"i0": "pcol1"}, + EnvironmentId: "env1", + }, + }, + Pcollections: map[string]*pipepb.PCollection{ + "pcol1": { + UniqueName: "pcol1", + CoderId: "coder1", + WindowingStrategyId: "ws1", + }, + }, + Coders: map[string]*pipepb.Coder{ + "coder1": {Spec: &pipepb.FunctionSpec{Urn: "coder1"}}, + }, + WindowingStrategies: map[string]*pipepb.WindowingStrategy{ + "ws1": {WindowCoderId: "global"}, + }, + Environments: map[string]*pipepb.Environment{ + "env1": {Urn: "env1"}, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pre := newPreprocessor([]transformPreparer{&testPreparer{}}) + + gotStages := pre.preProcessGraph(test.input) + if diff := cmp.Diff(test.wantStages, gotStages, cmp.AllowUnexported(stage{})); diff != "" { + t.Errorf("preProcessGraph(%q) stages diff (-want,+got)\n%v", test.name, diff) + } + + if diff := cmp.Diff(test.input, test.wantComponents, protocmp.Transform()); diff != "" { + t.Errorf("preProcessGraph(%q) components diff (-want,+got)\n%v", test.name, diff) + } + }) + } +} + +type testPreparer struct{} + +func (p *testPreparer) PrepareUrns() []string { + return []string{"test_urn"} +} + +func (p *testPreparer) PrepareTransform(tid string, t *pipepb.PTransform, comps *pipepb.Components) (*pipepb.Components, []string) { + return &pipepb.Components{ + Transforms: map[string]*pipepb.PTransform{ + "e1_early": { + UniqueName: "e1_early", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + Outputs: map[string]string{"i0": "pcol1"}, + EnvironmentId: "env1", + }, + "e1_late": { + UniqueName: "e1_late", + Spec: &pipepb.FunctionSpec{ + Urn: "defaultUrn", + }, + Inputs: map[string]string{"i0": "pcol1"}, + EnvironmentId: "env1", + }, + }, + Pcollections: map[string]*pipepb.PCollection{ + "pcol1": { + UniqueName: "pcol1", + CoderId: "coder1", + WindowingStrategyId: "ws1", + }, + }, + Coders: map[string]*pipepb.Coder{ + "coder1": {Spec: &pipepb.FunctionSpec{Urn: "coder1"}}, + }, + WindowingStrategies: map[string]*pipepb.WindowingStrategy{ + "ws1": {WindowCoderId: "global"}, + }, + Environments: map[string]*pipepb.Environment{ + "env1": {Urn: "env1"}, + }, + }, []string{"e1"} +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/separate_test.go b/sdks/go/pkg/beam/runners/prism/internal/separate_test.go new file mode 100644 index 000000000000..2e96651bfe98 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/separate_test.go @@ -0,0 +1,595 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + "fmt" + "net" + "net/http" + "net/rpc" + "sync" + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/reflectx" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/rtrackers/offsetrange" + "github.com/apache/beam/sdks/v2/go/pkg/beam/register" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats" + "golang.org/x/exp/slog" +) + +// separate_test.go retains structures and tests to ensure the runner can +// perform separation, and terminate checkpoints. + +// Global variable, so only one is registered with the OS. +var ws = &Watchers{} + +// TestSeparation validates that the runner is able to split +// elements in time and space. Beam has a few mechanisms to +// do this. +// +// First is channel splits, where a slowly processing +// bundle might have it's remaining buffered elements truncated +// so they can be processed by another bundle, +// possibly simultaneously. +// +// Second is sub element splitting, where a single element +// in an SDF might be split into smaller restrictions. +// +// Third with Checkpointing or ProcessContinuations, +// a User DoFn may decide to defer processing of an element +// until later, permitting a bundle to terminate earlier, +// delaying processing. +// +// All these may be tested locally or in process with a small +// server the DoFns can connect to. This can then indicate which +// elements, or positions are considered "sentinels". +// +// When a sentinel is to be processed, instead the DoFn blocks. +// The goal for Splitting tests is to succeed only when all +// sentinels are blocking waiting to be processed. +// This indicates the runner has "separated" the sentinels, hence +// the name "separation harness tests". +// +// Delayed Process Continuations can be similiarly tested, +// as this emulates external processing servers anyway. +// It's much simpler though, as the request is to determine if +// a given element should be delayed or not. This could be used +// for arbitrarily complex splitting patterns, as desired. +func TestSeparation(t *testing.T) { + initRunner(t) + + ws.initRPCServer() + + tests := []struct { + name string + pipeline func(s beam.Scope) + metrics func(t *testing.T, pr beam.PipelineResult) + }{ + { + name: "ProcessContinuations_combine_globalWindow", + pipeline: func(s beam.Scope) { + count := 10 + imp := beam.Impulse(s) + out := beam.ParDo(s, &sepHarnessSdfStream{ + Base: sepHarnessBase{ + WatcherID: ws.newWatcher(3), + Sleep: time.Second, + IsSentinelEncoded: beam.EncodedFunc{Fn: reflectx.MakeFunc(allSentinel)}, + LocalService: ws.serviceAddress, + }, + RestSize: int64(count), + }, imp) + passert.Count(s, out, "global num ints", count) + }, + }, { + name: "ProcessContinuations_stepped_combine_globalWindow", + pipeline: func(s beam.Scope) { + count := 10 + imp := beam.Impulse(s) + out := beam.ParDo(s, &singleStepSdfStream{ + Sleep: time.Second, + RestSize: int64(count), + }, imp) + passert.Count(s, out, "global stepped num ints", count) + sum := beam.ParDo(s, dofn2x1, imp, beam.SideInput{Input: out}) + beam.ParDo(s, &int64Check{Name: "stepped", Want: []int{45}}, sum) + }, + }, { + name: "ProcessContinuations_stepped_combine_fixedWindow", + pipeline: func(s beam.Scope) { + elms, mod := 1000, 10 + count := int(elms / mod) + imp := beam.Impulse(s) + out := beam.ParDo(s, &eventtimeSDFStream{ + Sleep: time.Second, + RestSize: int64(elms), + Mod: int64(mod), + Fixed: 1, + }, imp) + windowed := beam.WindowInto(s, window.NewFixedWindows(time.Second*10), out) + sum := stats.Sum(s, windowed) + // We expect each window to be processed ASAP, and produced one + // at a time, with the same results. + beam.ParDo(s, &int64Check{Name: "single", Want: []int{55}}, sum) + // But we need to receive the expected number of identical results + gsum := beam.WindowInto(s, window.NewGlobalWindows(), sum) + passert.Count(s, gsum, "total sums", count) + }, + }, + } + + // TODO: Channel Splits + // TODO: SubElement/dynamic splits. + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + p, s := beam.NewPipelineWithRoot() + test.pipeline(s) + pr, err := executeWithT(context.Background(), t, p) + if err != nil { + t.Fatal(err) + } + if test.metrics != nil { + test.metrics(t, pr) + } + }) + } +} + +func init() { + register.Function1x1(allSentinel) +} + +// allSentinel indicates that all elements are sentinels. +func allSentinel(v beam.T) bool { + return true +} + +// Watcher is an instance of the counters. +type watcher struct { + id int + mu sync.Mutex + sentinelCount, sentinelCap int +} + +func (w *watcher) LogValue() slog.Value { + return slog.GroupValue( + slog.Int("id", w.id), + slog.Int("sentinelCount", w.sentinelCount), + slog.Int("sentinelCap", w.sentinelCap), + ) +} + +// Watchers is a "net/rpc" service. +type Watchers struct { + mu sync.Mutex + nextID int + lookup map[int]*watcher + serviceOnce sync.Once + serviceAddress string +} + +// Args is the set of parameters to the watchers RPC methods. +type Args struct { + WatcherID int +} + +// Block is called once per sentinel, to indicate it will block +// until all sentinels are blocked. +func (ws *Watchers) Block(args *Args, _ *bool) error { + ws.mu.Lock() + defer ws.mu.Unlock() + w, ok := ws.lookup[args.WatcherID] + if !ok { + return fmt.Errorf("no watcher with id %v", args.WatcherID) + } + w.mu.Lock() + w.sentinelCount++ + w.mu.Unlock() + return nil +} + +// Check returns whether the sentinels are unblocked or not. +func (ws *Watchers) Check(args *Args, unblocked *bool) error { + ws.mu.Lock() + defer ws.mu.Unlock() + w, ok := ws.lookup[args.WatcherID] + if !ok { + return fmt.Errorf("no watcher with id %v", args.WatcherID) + } + w.mu.Lock() + *unblocked = w.sentinelCount >= w.sentinelCap + w.mu.Unlock() + slog.Debug("sentinel target for watcher%d is %d/%d. unblocked=%v", args.WatcherID, w.sentinelCount, w.sentinelCap, *unblocked) + return nil +} + +// Delay returns whether the sentinels should delay. +// This increments the sentinel cap, and returns unblocked. +// Intended to validate ProcessContinuation behavior. +func (ws *Watchers) Delay(args *Args, delay *bool) error { + ws.mu.Lock() + defer ws.mu.Unlock() + w, ok := ws.lookup[args.WatcherID] + if !ok { + return fmt.Errorf("no watcher with id %v", args.WatcherID) + } + w.mu.Lock() + w.sentinelCount++ + // Delay as long as the sentinel count is under the cap. + *delay = w.sentinelCount < w.sentinelCap + w.mu.Unlock() + slog.Debug("Delay: sentinel target", "watcher", w, slog.Bool("delay", *delay)) + return nil +} + +func (ws *Watchers) initRPCServer() { + ws.serviceOnce.Do(func() { + l, err := net.Listen("tcp", ":0") + if err != nil { + panic(err) + } + rpc.Register(ws) + rpc.HandleHTTP() + go http.Serve(l, nil) + ws.serviceAddress = l.Addr().String() + }) +} + +// newWatcher starts an rpc server to manage state for watching for +// sentinels across local machines. +func (ws *Watchers) newWatcher(sentinelCap int) int { + ws.mu.Lock() + defer ws.mu.Unlock() + ws.initRPCServer() + if ws.lookup == nil { + ws.lookup = map[int]*watcher{} + } + w := &watcher{id: ws.nextID, sentinelCap: sentinelCap} + ws.nextID++ + ws.lookup[w.id] = w + return w.id +} + +// sepHarnessBase contains fields and functions that are shared by all +// versions of the separation harness. +type sepHarnessBase struct { + WatcherID int + Sleep time.Duration + IsSentinelEncoded beam.EncodedFunc + LocalService string +} + +// One connection per binary. +var ( + sepClientOnce sync.Once + sepClient *rpc.Client + sepClientMu sync.Mutex + sepWaitMap map[int]chan struct{} +) + +func (fn *sepHarnessBase) setup() error { + sepClientMu.Lock() + defer sepClientMu.Unlock() + sepClientOnce.Do(func() { + client, err := rpc.DialHTTP("tcp", fn.LocalService) + if err != nil { + slog.Error("failed to dial sentinels server", err, slog.String("endpoint", fn.LocalService)) + panic(fmt.Sprintf("dialing sentinels server %v: %v", fn.LocalService, err)) + } + sepClient = client + sepWaitMap = map[int]chan struct{}{} + }) + + // Check if there's already a local channel for this id, and if not + // start a watcher goroutine to poll and unblock the harness when + // the expected number of sentinels is reached. + if _, ok := sepWaitMap[fn.WatcherID]; !ok { + return nil + } + // We need a channel to block on for this watcherID + // We use a channel instead of a wait group since the finished + // count is hosted in a different process. + c := make(chan struct{}) + sepWaitMap[fn.WatcherID] = c + go func(id int, c chan struct{}) { + for { + time.Sleep(time.Second * 1) // Check counts every second. + sepClientMu.Lock() + var unblock bool + err := sepClient.Call("Watchers.Check", &Args{WatcherID: id}, &unblock) + if err != nil { + slog.Error("Watchers.Check: sentinels server error", err, slog.String("endpoint", fn.LocalService)) + panic("sentinel server error") + } + if unblock { + close(c) // unblock all the local waiters. + slog.Debug("sentinel target for watcher, unblocking", slog.Int("watcherID", id)) + sepClientMu.Unlock() + return + } + slog.Debug("sentinel target for watcher not met", slog.Int("watcherID", id)) + sepClientMu.Unlock() + } + }(fn.WatcherID, c) + return nil +} + +func (fn *sepHarnessBase) block() { + sepClientMu.Lock() + var ignored bool + err := sepClient.Call("Watchers.Block", &Args{WatcherID: fn.WatcherID}, &ignored) + if err != nil { + slog.Error("Watchers.Block error", err, slog.String("endpoint", fn.LocalService)) + panic(err) + } + c := sepWaitMap[fn.WatcherID] + sepClientMu.Unlock() + + // Block until the watcher closes the channel. + <-c +} + +// delay inform the DoFn whether or not to return a delayed Processing continuation for this position. +func (fn *sepHarnessBase) delay() bool { + sepClientMu.Lock() + defer sepClientMu.Unlock() + var delay bool + err := sepClient.Call("Watchers.Delay", &Args{WatcherID: fn.WatcherID}, &delay) + if err != nil { + slog.Error("Watchers.Delay error", err) + panic(err) + } + return delay +} + +// sepHarness is a simple DoFn that blocks when reaching a sentinel. +// It's useful for testing blocks on channel splits. +type sepHarness struct { + Base sepHarnessBase +} + +func (fn *sepHarness) Setup() error { + return fn.Base.setup() +} + +func (fn *sepHarness) ProcessElement(v beam.T) beam.T { + if fn.Base.IsSentinelEncoded.Fn.Call([]any{v})[0].(bool) { + slog.Debug("blocking on sentinel", slog.Any("sentinel", v)) + fn.Base.block() + slog.Debug("unblocking from sentinel", slog.Any("sentinel", v)) + } else { + time.Sleep(fn.Base.Sleep) + } + return v +} + +type sepHarnessSdf struct { + Base sepHarnessBase + RestSize int64 +} + +func (fn *sepHarnessSdf) Setup() error { + return fn.Base.setup() +} + +func (fn *sepHarnessSdf) CreateInitialRestriction(v beam.T) offsetrange.Restriction { + return offsetrange.Restriction{Start: 0, End: fn.RestSize} +} + +func (fn *sepHarnessSdf) SplitRestriction(v beam.T, r offsetrange.Restriction) []offsetrange.Restriction { + return r.EvenSplits(2) +} + +func (fn *sepHarnessSdf) RestrictionSize(v beam.T, r offsetrange.Restriction) float64 { + return r.Size() +} + +func (fn *sepHarnessSdf) CreateTracker(r offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(r)) +} + +func (fn *sepHarnessSdf) ProcessElement(rt *sdf.LockRTracker, v beam.T, emit func(beam.T)) { + i := rt.GetRestriction().(offsetrange.Restriction).Start + for rt.TryClaim(i) { + if fn.Base.IsSentinelEncoded.Fn.Call([]any{i, v})[0].(bool) { + slog.Debug("blocking on sentinel", slog.Group("sentinel", slog.Any("value", v), slog.Int64("pos", i))) + fn.Base.block() + slog.Debug("unblocking from sentinel", slog.Group("sentinel", slog.Any("value", v), slog.Int64("pos", i))) + } else { + time.Sleep(fn.Base.Sleep) + } + emit(v) + i++ + } +} + +func init() { + register.DoFn1x1[beam.T, beam.T]((*sepHarness)(nil)) + register.DoFn3x0[*sdf.LockRTracker, beam.T, func(beam.T)]((*sepHarnessSdf)(nil)) + register.Emitter1[beam.T]() + register.DoFn3x1[*sdf.LockRTracker, beam.T, func(beam.T), sdf.ProcessContinuation]((*sepHarnessSdfStream)(nil)) + register.DoFn3x1[*sdf.LockRTracker, beam.T, func(int64), sdf.ProcessContinuation]((*singleStepSdfStream)(nil)) + register.Emitter1[int64]() + register.DoFn4x1[*CWE, *sdf.LockRTracker, beam.T, func(beam.EventTime, int64), sdf.ProcessContinuation]((*eventtimeSDFStream)(nil)) + register.Emitter2[beam.EventTime, int64]() +} + +type sepHarnessSdfStream struct { + Base sepHarnessBase + RestSize int64 +} + +func (fn *sepHarnessSdfStream) Setup() error { + return fn.Base.setup() +} + +func (fn *sepHarnessSdfStream) CreateInitialRestriction(v beam.T) offsetrange.Restriction { + return offsetrange.Restriction{Start: 0, End: fn.RestSize} +} + +func (fn *sepHarnessSdfStream) SplitRestriction(v beam.T, r offsetrange.Restriction) []offsetrange.Restriction { + return r.EvenSplits(2) +} + +func (fn *sepHarnessSdfStream) RestrictionSize(v beam.T, r offsetrange.Restriction) float64 { + return r.Size() +} + +func (fn *sepHarnessSdfStream) CreateTracker(r offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(r)) +} + +func (fn *sepHarnessSdfStream) ProcessElement(rt *sdf.LockRTracker, v beam.T, emit func(beam.T)) sdf.ProcessContinuation { + if fn.Base.IsSentinelEncoded.Fn.Call([]any{v})[0].(bool) { + if fn.Base.delay() { + slog.Debug("delaying on sentinel", slog.Group("sentinel", slog.Any("value", v))) + return sdf.ResumeProcessingIn(fn.Base.Sleep) + } + slog.Debug("cleared to process sentinel", slog.Group("sentinel", slog.Any("value", v))) + } + r := rt.GetRestriction().(offsetrange.Restriction) + i := r.Start + for rt.TryClaim(i) { + emit(v) + i++ + } + return sdf.StopProcessing() +} + +// singleStepSdfStream only emits a single position at a time then sleeps. +// Stops when a restriction of size 0 is provided. +type singleStepSdfStream struct { + RestSize int64 + Sleep time.Duration +} + +func (fn *singleStepSdfStream) Setup() error { + return nil +} + +func (fn *singleStepSdfStream) CreateInitialRestriction(v beam.T) offsetrange.Restriction { + return offsetrange.Restriction{Start: 0, End: fn.RestSize} +} + +func (fn *singleStepSdfStream) SplitRestriction(v beam.T, r offsetrange.Restriction) []offsetrange.Restriction { + return r.EvenSplits(2) +} + +func (fn *singleStepSdfStream) RestrictionSize(v beam.T, r offsetrange.Restriction) float64 { + return r.Size() +} + +func (fn *singleStepSdfStream) CreateTracker(r offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(r)) +} + +func (fn *singleStepSdfStream) ProcessElement(rt *sdf.LockRTracker, v beam.T, emit func(int64)) sdf.ProcessContinuation { + r := rt.GetRestriction().(offsetrange.Restriction) + i := r.Start + if r.Size() < 1 { + slog.Debug("size 0 restriction, stoping to process sentinel", slog.Any("value", v)) + return sdf.StopProcessing() + } + slog.Debug("emitting element to restriction", slog.Any("value", v), slog.Group("restriction", + slog.Any("value", v), + slog.Float64("size", r.Size()), + slog.Int64("pos", i), + )) + if rt.TryClaim(i) { + emit(i) + } + return sdf.ResumeProcessingIn(fn.Sleep) +} + +type eventtimeSDFStream struct { + RestSize, Mod, Fixed int64 + Sleep time.Duration +} + +func (fn *eventtimeSDFStream) Setup() error { + return nil +} + +func (fn *eventtimeSDFStream) CreateInitialRestriction(v beam.T) offsetrange.Restriction { + return offsetrange.Restriction{Start: 0, End: fn.RestSize} +} + +func (fn *eventtimeSDFStream) SplitRestriction(v beam.T, r offsetrange.Restriction) []offsetrange.Restriction { + // No split + return []offsetrange.Restriction{r} +} + +func (fn *eventtimeSDFStream) RestrictionSize(v beam.T, r offsetrange.Restriction) float64 { + return r.Size() +} + +func (fn *eventtimeSDFStream) CreateTracker(r offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(r)) +} + +func (fn *eventtimeSDFStream) ProcessElement(_ *CWE, rt *sdf.LockRTracker, v beam.T, emit func(beam.EventTime, int64)) sdf.ProcessContinuation { + r := rt.GetRestriction().(offsetrange.Restriction) + i := r.Start + if r.Size() < 1 { + slog.Debug("size 0 restriction, stoping to process sentinel", slog.Any("value", v)) + return sdf.StopProcessing() + } + slog.Debug("emitting element to restriction", slog.Any("value", v), slog.Group("restriction", + slog.Any("value", v), + slog.Float64("size", r.Size()), + slog.Int64("pos", i), + )) + if rt.TryClaim(i) { + timestamp := mtime.FromMilliseconds(int64((i + 1) * 1000)).Subtract(10 * time.Millisecond) + v := (i % fn.Mod) + fn.Fixed + emit(timestamp, v) + } + return sdf.ResumeProcessingIn(fn.Sleep) +} + +func (fn *eventtimeSDFStream) InitialWatermarkEstimatorState(_ beam.EventTime, _ offsetrange.Restriction, _ beam.T) int64 { + return int64(mtime.MinTimestamp) +} + +func (fn *eventtimeSDFStream) CreateWatermarkEstimator(initialState int64) *CWE { + return &CWE{Watermark: initialState} +} + +func (fn *eventtimeSDFStream) WatermarkEstimatorState(e *CWE) int64 { + return e.Watermark +} + +type CWE struct { + Watermark int64 // uses int64, since the SDK prevent mtime.Time from serialization. +} + +func (e *CWE) CurrentWatermark() time.Time { + return mtime.Time(e.Watermark).ToTime() +} + +func (e *CWE) ObserveTimestamp(ts time.Time) { + // We add 10 milliseconds to allow window boundaries to + // progress after emitting + e.Watermark = int64(mtime.FromTime(ts.Add(-90 * time.Millisecond))) +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/stage.go b/sdks/go/pkg/beam/runners/prism/internal/stage.go new file mode 100644 index 000000000000..39c3f5ea5ff2 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/stage.go @@ -0,0 +1,400 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "bytes" + "fmt" + "io" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/maps" + "golang.org/x/exp/slog" + "google.golang.org/protobuf/proto" +) + +// stage represents a fused subgraph. +// +// TODO: do we guarantee that they are all +// the same environment at this point, or +// should that be handled later? +type stage struct { + ID string + transforms []string + + envID string + exe transformExecuter + outputCount int + inputTransformID string + mainInputPCol string + inputInfo engine.PColInfo + desc *fnpb.ProcessBundleDescriptor + sides []string + prepareSides func(b *worker.B, tid string, watermark mtime.Time) + + SinkToPCollection map[string]string + OutputsToCoders map[string]engine.PColInfo +} + +func (s *stage) Execute(j *jobservices.Job, wk *worker.W, comps *pipepb.Components, em *engine.ElementManager, rb engine.RunBundle) { + tid := s.transforms[0] + slog.Debug("Execute: starting bundle", "bundle", rb, slog.String("tid", tid)) + + var b *worker.B + var send bool + inputData := em.InputForBundle(rb, s.inputInfo) + switch s.envID { + case "": // Runner Transforms + // Runner transforms are processed immeadiately. + b = s.exe.ExecuteTransform(tid, comps.GetTransforms()[tid], comps, rb.Watermark, inputData) + b.InstID = rb.BundleID + slog.Debug("Execute: runner transform", "bundle", rb, slog.String("tid", tid)) + case wk.ID: + send = true + b = &worker.B{ + PBDID: s.ID, + InstID: rb.BundleID, + + InputTransformID: s.inputTransformID, + + // TODO Here's where we can split data for processing in multiple bundles. + InputData: inputData, + + SinkToPCollection: s.SinkToPCollection, + OutputCount: s.outputCount, + } + b.Init() + + s.prepareSides(b, s.transforms[0], rb.Watermark) + default: + err := fmt.Errorf("unknown environment[%v]", s.envID) + slog.Error("Execute", err) + panic(err) + } + + if send { + slog.Debug("Execute: processing", "bundle", rb) + b.ProcessOn(wk) // Blocks until finished. + } + // Tentative Data is ready, commit it to the main datastore. + slog.Debug("Execute: commiting data", "bundle", rb, slog.Any("outputsWithData", maps.Keys(b.OutputData.Raw)), slog.Any("outputs", maps.Keys(s.OutputsToCoders))) + + resp := &fnpb.ProcessBundleResponse{} + if send { + resp = <-b.Resp + // Tally metrics immeadiately so they're available before + // pipeline termination. + j.ContributeMetrics(resp) + } + // TODO handle side input data properly. + wk.D.Commit(b.OutputData) + var residualData [][]byte + var minOutputWatermark map[string]mtime.Time + for _, rr := range resp.GetResidualRoots() { + ba := rr.GetApplication() + residualData = append(residualData, ba.GetElement()) + if len(ba.GetElement()) == 0 { + slog.Log(slog.LevelError, "returned empty residual application", "bundle", rb) + panic("sdk returned empty residual application") + } + for col, wm := range ba.GetOutputWatermarks() { + if minOutputWatermark == nil { + minOutputWatermark = map[string]mtime.Time{} + } + cur, ok := minOutputWatermark[col] + if !ok { + cur = mtime.MaxTimestamp + } + minOutputWatermark[col] = mtime.Min(mtime.FromTime(wm.AsTime()), cur) + } + } + if l := len(residualData); l > 0 { + slog.Debug("returned empty residual application", "bundle", rb, slog.Int("numResiduals", l), slog.String("pcollection", s.mainInputPCol)) + } + em.PersistBundle(rb, s.OutputsToCoders, b.OutputData, s.inputInfo, residualData, minOutputWatermark) + b.OutputData = engine.TentativeData{} // Clear the data. +} + +func getSideInputs(t *pipepb.PTransform) (map[string]*pipepb.SideInput, error) { + if t.GetSpec().GetUrn() != urns.TransformParDo { + return nil, nil + } + pardo := &pipepb.ParDoPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(t.GetSpec().GetPayload(), pardo); err != nil { + return nil, fmt.Errorf("unable to decode ParDoPayload") + } + return pardo.GetSideInputs(), nil +} + +func portFor(wInCid string, wk *worker.W) []byte { + sourcePort := &fnpb.RemoteGrpcPort{ + CoderId: wInCid, + ApiServiceDescriptor: &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + }, + } + sourcePortBytes, err := proto.Marshal(sourcePort) + if err != nil { + slog.Error("bad port", err, slog.String("endpoint", sourcePort.ApiServiceDescriptor.GetUrl())) + } + return sourcePortBytes +} + +func buildStage(s *stage, tid string, t *pipepb.PTransform, comps *pipepb.Components, wk *worker.W) { + s.inputTransformID = tid + "_source" + + coders := map[string]*pipepb.Coder{} + transforms := map[string]*pipepb.PTransform{ + tid: t, // The Transform to Execute! + } + + sis, err := getSideInputs(t) + if err != nil { + slog.Error("buildStage: getSide Inputs", err, slog.String("transformID", tid)) + panic(err) + } + var inputInfo engine.PColInfo + var sides []string + for local, global := range t.GetInputs() { + // This id is directly used for the source, but this also copies + // coders used by side inputs to the coders map for the bundle, so + // needs to be run for every ID. + wInCid := makeWindowedValueCoder(global, comps, coders) + _, ok := sis[local] + if ok { + sides = append(sides, global) + } else { + // this is the main input + transforms[s.inputTransformID] = sourceTransform(s.inputTransformID, portFor(wInCid, wk), global) + col := comps.GetPcollections()[global] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + inputInfo = engine.PColInfo{ + GlobalID: global, + WDec: wDec, + WEnc: wEnc, + EDec: ed, + } + } + // We need to process all inputs to ensure we have all input coders, so we must continue. + } + + prepareSides, err := handleSideInputs(t, comps, coders, wk) + if err != nil { + slog.Error("buildStage: handleSideInputs", err, slog.String("transformID", tid)) + panic(err) + } + + // TODO: We need a new logical PCollection to represent the source + // so we can avoid double counting PCollection metrics later. + // But this also means replacing the ID for the input in the bundle. + sink2Col := map[string]string{} + col2Coders := map[string]engine.PColInfo{} + for local, global := range t.GetOutputs() { + wOutCid := makeWindowedValueCoder(global, comps, coders) + sinkID := tid + "_" + local + col := comps.GetPcollections()[global] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + sink2Col[sinkID] = global + col2Coders[global] = engine.PColInfo{ + GlobalID: global, + WDec: wDec, + WEnc: wEnc, + EDec: ed, + } + transforms[sinkID] = sinkTransform(sinkID, portFor(wOutCid, wk), global) + } + + reconcileCoders(coders, comps.GetCoders()) + + desc := &fnpb.ProcessBundleDescriptor{ + Id: s.ID, + Transforms: transforms, + WindowingStrategies: comps.GetWindowingStrategies(), + Pcollections: comps.GetPcollections(), + Coders: coders, + StateApiServiceDescriptor: &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + }, + } + + s.desc = desc + s.outputCount = len(t.Outputs) + s.prepareSides = prepareSides + s.sides = sides + s.SinkToPCollection = sink2Col + s.OutputsToCoders = col2Coders + s.mainInputPCol = inputInfo.GlobalID + s.inputInfo = inputInfo + + wk.Descriptors[s.ID] = s.desc +} + +// handleSideInputs ensures appropriate coders are available to the bundle, and prepares a function to stage the data. +func handleSideInputs(t *pipepb.PTransform, comps *pipepb.Components, coders map[string]*pipepb.Coder, wk *worker.W) (func(b *worker.B, tid string, watermark mtime.Time), error) { + sis, err := getSideInputs(t) + if err != nil { + return nil, err + } + var prepSides []func(b *worker.B, tid string, watermark mtime.Time) + + // Get WindowedValue Coders for the transform's input and output PCollections. + for local, global := range t.GetInputs() { + si, ok := sis[local] + if !ok { + continue // This is the main input. + } + + // this is a side input + switch si.GetAccessPattern().GetUrn() { + case urns.SideInputIterable: + slog.Debug("urnSideInputIterable", + slog.String("sourceTransform", t.GetUniqueName()), + slog.String("local", local), + slog.String("global", global)) + col := comps.GetPcollections()[global] + ed := collectionPullDecoder(col.GetCoderId(), coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + // May be of zero length, but that's OK. Side inputs can be empty. + + global, local := global, local + prepSides = append(prepSides, func(b *worker.B, tid string, watermark mtime.Time) { + data := wk.D.GetAllData(global) + + if b.IterableSideInputData == nil { + b.IterableSideInputData = map[string]map[string]map[typex.Window][][]byte{} + } + if _, ok := b.IterableSideInputData[tid]; !ok { + b.IterableSideInputData[tid] = map[string]map[typex.Window][][]byte{} + } + b.IterableSideInputData[tid][local] = collateByWindows(data, watermark, wDec, wEnc, + func(r io.Reader) [][]byte { + return [][]byte{ed(r)} + }, func(a, b [][]byte) [][]byte { + return append(a, b...) + }) + }) + + case urns.SideInputMultiMap: + slog.Debug("urnSideInputMultiMap", + slog.String("sourceTransform", t.GetUniqueName()), + slog.String("local", local), + slog.String("global", global)) + col := comps.GetPcollections()[global] + + kvc := comps.GetCoders()[col.GetCoderId()] + if kvc.GetSpec().GetUrn() != urns.CoderKV { + return nil, fmt.Errorf("multimap side inputs needs KV coder, got %v", kvc.GetSpec().GetUrn()) + } + + kd := collectionPullDecoder(kvc.GetComponentCoderIds()[0], coders, comps) + vd := collectionPullDecoder(kvc.GetComponentCoderIds()[1], coders, comps) + wDec, wEnc := getWindowValueCoders(comps, col, coders) + + global, local := global, local + prepSides = append(prepSides, func(b *worker.B, tid string, watermark mtime.Time) { + // May be of zero length, but that's OK. Side inputs can be empty. + data := wk.D.GetAllData(global) + if b.MultiMapSideInputData == nil { + b.MultiMapSideInputData = map[string]map[string]map[typex.Window]map[string][][]byte{} + } + if _, ok := b.MultiMapSideInputData[tid]; !ok { + b.MultiMapSideInputData[tid] = map[string]map[typex.Window]map[string][][]byte{} + } + b.MultiMapSideInputData[tid][local] = collateByWindows(data, watermark, wDec, wEnc, + func(r io.Reader) map[string][][]byte { + kb := kd(r) + return map[string][][]byte{ + string(kb): {vd(r)}, + } + }, func(a, b map[string][][]byte) map[string][][]byte { + if len(a) == 0 { + return b + } + for k, vs := range b { + a[k] = append(a[k], vs...) + } + return a + }) + }) + default: + return nil, fmt.Errorf("local input %v (global %v) uses accesspattern %v", local, global, si.GetAccessPattern().GetUrn()) + } + } + return func(b *worker.B, tid string, watermark mtime.Time) { + for _, prep := range prepSides { + prep(b, tid, watermark) + } + }, nil +} + +func sourceTransform(parentID string, sourcePortBytes []byte, outPID string) *pipepb.PTransform { + source := &pipepb.PTransform{ + UniqueName: parentID, + Spec: &pipepb.FunctionSpec{ + Urn: urns.TransformSource, + Payload: sourcePortBytes, + }, + Outputs: map[string]string{ + "i0": outPID, + }, + } + return source +} + +func sinkTransform(sinkID string, sinkPortBytes []byte, inPID string) *pipepb.PTransform { + source := &pipepb.PTransform{ + UniqueName: sinkID, + Spec: &pipepb.FunctionSpec{ + Urn: urns.TransformSink, + Payload: sinkPortBytes, + }, + Inputs: map[string]string{ + "i0": inPID, + }, + } + return source +} + +// collateByWindows takes the data and collates them into window keyed maps. +// Uses generics to consolidate the repetitive window loops. +func collateByWindows[T any](data [][]byte, watermark mtime.Time, wDec exec.WindowDecoder, wEnc exec.WindowEncoder, ed func(io.Reader) T, join func(T, T) T) map[typex.Window]T { + windowed := map[typex.Window]T{} + for _, datum := range data { + inBuf := bytes.NewBuffer(datum) + for { + ws, _, _, err := exec.DecodeWindowedValueHeader(wDec, inBuf) + if err == io.EOF { + break + } + // Get the element out, and window them properly. + e := ed(inBuf) + for _, w := range ws { + windowed[w] = join(windowed[w], e) + } + } + } + return windowed +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/testdofns.go b/sdks/go/pkg/beam/runners/prism/internal/testdofns.go new file mode 100644 index 000000000000..4aa07a46c6f2 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/testdofns.go @@ -0,0 +1,349 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + "fmt" + "sort" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/rtrackers/offsetrange" + "github.com/apache/beam/sdks/v2/go/pkg/beam/log" + "github.com/apache/beam/sdks/v2/go/pkg/beam/register" + "github.com/google/go-cmp/cmp" +) + +// The Test DoFns live outside of the test files to get coverage information on DoFn +// Lifecycle method execution. This inflates binary size, but ensures the runner is +// exercising the expected feature set. +// +// Once there's enough confidence in the runner, we can move these into a dedicated testing +// package along with the pipelines that use them. + +// Registrations should happen in the test files, so the compiler can prune these +// when they are not in use. + +func dofn1(imp []byte, emit func(int64)) { + emit(1) + emit(2) + emit(3) +} + +func dofn1kv(imp []byte, emit func(int64, int64)) { + emit(0, 1) + emit(0, 2) + emit(0, 3) +} + +func dofn1x2(imp []byte, emitA func(int64), emitB func(int64)) { + emitA(1) + emitA(2) + emitA(3) + emitB(4) + emitB(5) + emitB(6) +} + +func dofn1x5(imp []byte, emitA, emitB, emitC, emitD, emitE func(int64)) { + emitA(1) + emitB(2) + emitC(3) + emitD(4) + emitE(5) + emitA(6) + emitB(7) + emitC(8) + emitD(9) + emitE(10) +} + +func dofn2x1(imp []byte, iter func(*int64) bool, emit func(int64)) { + var v, sum, c int64 + for iter(&v) { + fmt.Println("dofn2x1 v", v, " c ", c) + sum += v + c++ + } + fmt.Println("dofn2x1 sum", sum, "count", c) + emit(sum) +} + +func dofn2x2KV(imp []byte, iter func(*string, *int64) bool, emitK func(string), emitV func(int64)) { + var k string + var v, sum int64 + for iter(&k, &v) { + sum += v + emitK(k) + } + emitV(sum) +} + +func dofnMultiMap(key string, lookup func(string) func(*int64) bool, emitK func(string), emitV func(int64)) { + var v, sum int64 + iter := lookup(key) + for iter(&v) { + sum += v + } + emitK(key) + emitV(sum) +} + +func dofn3x1(sum int64, iter1, iter2 func(*int64) bool, emit func(int64)) { + var v int64 + for iter1(&v) { + sum += v + } + for iter2(&v) { + sum += v + } + emit(sum) +} + +// int64Check validates that within a single bundle, +// we received the expected int64 values & sends them downstream. +// +// Invalid pattern for general testing, as it will fail +// on other valid execution patterns, like single element bundles. +type int64Check struct { + Name string + Want []int + got []int +} + +func (fn *int64Check) ProcessElement(v int64, _ func(int64)) { + fn.got = append(fn.got, int(v)) +} + +func (fn *int64Check) FinishBundle(_ func(int64)) error { + sort.Ints(fn.got) + sort.Ints(fn.Want) + if d := cmp.Diff(fn.Want, fn.got); d != "" { + return fmt.Errorf("int64Check[%v] (-want, +got): %v", fn.Name, d) + } + // Clear for subsequent calls. + fn.got = nil + return nil +} + +// stringCheck validates that within a single bundle, +// we received the expected string values. +// Re-emits them downstream. +// +// Invalid pattern for general testing, as it will fail +// on other valid execution patterns, like single element bundles. +type stringCheck struct { + Name string + Want []string + got []string +} + +func (fn *stringCheck) ProcessElement(v string, _ func(string)) { + fn.got = append(fn.got, v) +} + +func (fn *stringCheck) FinishBundle(_ func(string)) error { + sort.Strings(fn.got) + sort.Strings(fn.Want) + if d := cmp.Diff(fn.Want, fn.got); d != "" { + return fmt.Errorf("stringCheck[%v] (-want, +got): %v", fn.Name, d) + } + return nil +} + +func dofn2(v int64, emit func(int64)) { + emit(v + 1) +} + +func dofnKV(imp []byte, emit func(string, int64)) { + emit("a", 1) + emit("b", 2) + emit("a", 3) + emit("b", 4) + emit("a", 5) + emit("b", 6) +} + +func dofnKV2(imp []byte, emit func(int64, string)) { + emit(1, "a") + emit(2, "b") + emit(1, "a") + emit(2, "b") + emit(1, "a") + emit(2, "b") +} + +func dofnGBK(k string, vs func(*int64) bool, emit func(int64)) { + var v, sum int64 + for vs(&v) { + sum += v + } + emit(sum) +} + +func dofnGBK2(k int64, vs func(*string) bool, emit func(string)) { + var v, sum string + for vs(&v) { + sum += v + } + emit(sum) +} + +type testRow struct { + A string + B int64 +} + +func dofnKV3(imp []byte, emit func(testRow, testRow)) { + emit(testRow{"a", 1}, testRow{"a", 1}) +} + +func dofnGBK3(k testRow, vs func(*testRow) bool, emit func(string)) { + var v testRow + vs(&v) + emit(fmt.Sprintf("%v: %v", k, v)) +} + +const ( + ns = "localtest" +) + +func dofnSink(ctx context.Context, _ []byte) { + beam.NewCounter(ns, "sunk").Inc(ctx, 73) +} + +func dofn1Counter(ctx context.Context, _ []byte, emit func(int64)) { + beam.NewCounter(ns, "count").Inc(ctx, 1) +} + +func combineIntSum(a, b int64) int64 { + return a + b +} + +// SourceConfig is a struct containing all the configuration options for a +// synthetic source. It should be created via a SourceConfigBuilder, not by +// directly initializing it (the fields are public to allow encoding). +type SourceConfig struct { + NumElements int64 `json:"num_records" beam:"num_records"` + InitialSplits int64 `json:"initial_splits" beam:"initial_splits"` +} + +// intRangeFn is a splittable DoFn for counting from 1 to N. +type intRangeFn struct{} + +// CreateInitialRestriction creates an offset range restriction representing +// the number of elements to emit. +func (fn *intRangeFn) CreateInitialRestriction(config SourceConfig) offsetrange.Restriction { + return offsetrange.Restriction{ + Start: 0, + End: int64(config.NumElements), + } +} + +// SplitRestriction splits restrictions equally according to the number of +// initial splits specified in SourceConfig. Each restriction output by this +// method will contain at least one element, so the number of splits will not +// exceed the number of elements. +func (fn *intRangeFn) SplitRestriction(config SourceConfig, rest offsetrange.Restriction) (splits []offsetrange.Restriction) { + return rest.EvenSplits(int64(config.InitialSplits)) +} + +// RestrictionSize outputs the size of the restriction as the number of elements +// that restriction will output. +func (fn *intRangeFn) RestrictionSize(_ SourceConfig, rest offsetrange.Restriction) float64 { + return rest.Size() +} + +// CreateTracker just creates an offset range restriction tracker for the +// restriction. +func (fn *intRangeFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(rest)) +} + +// ProcessElement creates a number of random elements based on the restriction +// tracker received. Each element is a random byte slice key and value, in the +// form of KV<[]byte, []byte>. +func (fn *intRangeFn) ProcessElement(rt *sdf.LockRTracker, config SourceConfig, emit func(int64)) error { + for i := rt.GetRestriction().(offsetrange.Restriction).Start; rt.TryClaim(i); i++ { + // Add 1 since the restrictions are from [0 ,N), but we want [1, N] + emit(i + 1) + } + return nil +} + +func init() { + register.DoFn3x1[*sdf.LockRTracker, []byte, func(int64), sdf.ProcessContinuation](&selfCheckpointingDoFn{}) + register.Emitter1[int64]() +} + +type selfCheckpointingDoFn struct{} + +// CreateInitialRestriction creates the restriction being used by the SDF. In this case, the range +// of values produced by the restriction is [Start, End). +func (fn *selfCheckpointingDoFn) CreateInitialRestriction(_ []byte) offsetrange.Restriction { + return offsetrange.Restriction{ + Start: int64(0), + End: int64(10), + } +} + +// CreateTracker wraps the given restriction into a LockRTracker type. +func (fn *selfCheckpointingDoFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker { + return sdf.NewLockRTracker(offsetrange.NewTracker(rest)) +} + +// RestrictionSize returns the size of the current restriction +func (fn *selfCheckpointingDoFn) RestrictionSize(_ []byte, rest offsetrange.Restriction) float64 { + return rest.Size() +} + +// SplitRestriction modifies the offsetrange.Restriction's sized restriction function to produce a size-zero restriction +// at the end of execution. +func (fn *selfCheckpointingDoFn) SplitRestriction(_ []byte, rest offsetrange.Restriction) []offsetrange.Restriction { + size := int64(3) + s := rest.Start + var splits []offsetrange.Restriction + for e := s + size; e <= rest.End; s, e = e, e+size { + splits = append(splits, offsetrange.Restriction{Start: s, End: e}) + } + splits = append(splits, offsetrange.Restriction{Start: s, End: rest.End}) + return splits +} + +// ProcessElement continually gets the start position of the restriction and emits it as an int64 value before checkpointing. +// This causes the restriction to be split after the claimed work and produce no primary roots. +func (fn *selfCheckpointingDoFn) ProcessElement(rt *sdf.LockRTracker, _ []byte, emit func(int64)) sdf.ProcessContinuation { + position := rt.GetRestriction().(offsetrange.Restriction).Start + + for { + if rt.TryClaim(position) { + // Successful claim, emit the value and move on. + emit(position) + position++ + } else if rt.GetError() != nil || rt.IsDone() { + // Stop processing on error or completion + if err := rt.GetError(); err != nil { + log.Errorf(context.Background(), "error in restriction tracker, got %v", err) + } + return sdf.StopProcessing() + } else { + // Resume later. + return sdf.ResumeProcessingIn(5 * time.Second) + } + } +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/testdofns_test.go b/sdks/go/pkg/beam/runners/prism/internal/testdofns_test.go new file mode 100644 index 000000000000..3596c40f0dcd --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/testdofns_test.go @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" + "github.com/apache/beam/sdks/v2/go/pkg/beam/register" +) + +// Test DoFns are registered in the test file, to allow them to be pruned +// by the compiler outside of test use. +func init() { + register.Function2x0(dofn1) + register.Function2x0(dofn1kv) + register.Function3x0(dofn1x2) + register.Function6x0(dofn1x5) + register.Function3x0(dofn2x1) + register.Function4x0(dofn2x2KV) + register.Function4x0(dofnMultiMap) + register.Iter1[int64]() + register.Function4x0(dofn3x1) + register.Iter2[string, int64]() + register.Emitter1[string]() + + register.Function2x0(dofn2) + register.Function2x0(dofnKV) + register.Function2x0(dofnKV2) + register.Function3x0(dofnGBK) + register.Function3x0(dofnGBK2) + register.DoFn2x0[int64, func(int64)]((*int64Check)(nil)) + register.DoFn2x0[string, func(string)]((*stringCheck)(nil)) + register.Function2x0(dofnKV3) + register.Function3x0(dofnGBK3) + register.Function3x0(dofn1Counter) + register.Function2x0(dofnSink) + + register.Function2x1(combineIntSum) + + register.DoFn3x1[*sdf.LockRTracker, SourceConfig, func(int64), error]((*intRangeFn)(nil)) + register.Emitter1[int64]() + register.Emitter2[int64, int64]() +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go index 09ee59c969a3..035ab3c0727f 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go +++ b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package urn handles extracting urns from all the protos. +// Package urns handles extracting urns from all the protos. package urns import ( diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go new file mode 100644 index 000000000000..f6fbf1293f47 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package worker + +import ( + "sync" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" + "golang.org/x/exp/slog" +) + +// B represents an extant ProcessBundle instruction sent to an SDK worker. +// Generally manipulated by another package to interact with a worker. +type B struct { + InstID string // ID for the instruction processing this bundle. + PBDID string // ID for the ProcessBundleDescriptor + + // InputTransformID is data being sent to the SDK. + InputTransformID string + InputData [][]byte // Data specifically for this bundle. + + // TODO change to a single map[tid] -> map[input] -> map[window] -> struct { Iter data, MultiMap data } instead of all maps. + // IterableSideInputData is a map from transformID, to inputID, to window, to data. + IterableSideInputData map[string]map[string]map[typex.Window][][]byte + // MultiMapSideInputData is a map from transformID, to inputID, to window, to data key, to data values. + MultiMapSideInputData map[string]map[string]map[typex.Window]map[string][][]byte + + // OutputCount is the number of data outputs this bundle has. + // We need to see this many closed data channels before the bundle is complete. + OutputCount int + // dataWait is how we determine if a bundle is finished, by waiting for each of + // a Bundle's DataSinks to produce their last output. + // After this point we can "commit" the bundle's output for downstream use. + dataWait sync.WaitGroup + OutputData engine.TentativeData + Resp chan *fnpb.ProcessBundleResponse + + SinkToPCollection map[string]string + + // TODO: Metrics for this bundle, can be handled after the fact. +} + +// Init initializes the bundle's internal state for waiting on all +// data and for relaying a response back. +func (b *B) Init() { + // We need to see final data signals that match the number of + // outputs the stage this bundle executes posesses + b.dataWait.Add(b.OutputCount) + b.Resp = make(chan *fnpb.ProcessBundleResponse, 1) +} + +func (b *B) LogValue() slog.Value { + return slog.GroupValue( + slog.String("ID", b.InstID), + slog.String("stage", b.PBDID)) +} + +// ProcessOn executes the given bundle on the given W, blocking +// until all data is complete. +// +// Assumes the bundle is initialized (all maps are non-nil, and data waitgroup is set, response channel initialized) +// Assumes the bundle descriptor is already registered with the W. +// +// While this method mostly manipulates a W, putting it on a B avoids mixing the workers +// public GRPC APIs up with local calls. +func (b *B) ProcessOn(wk *W) { + wk.mu.Lock() + wk.bundles[b.InstID] = b + wk.mu.Unlock() + + slog.Debug("processing", "bundle", b, "worker", wk) + + // Tell the SDK to start processing the bundle. + wk.InstReqs <- &fnpb.InstructionRequest{ + InstructionId: b.InstID, + Request: &fnpb.InstructionRequest_ProcessBundle{ + ProcessBundle: &fnpb.ProcessBundleRequest{ + ProcessBundleDescriptorId: b.PBDID, + }, + }, + } + + // TODO: make batching decisions. + for i, d := range b.InputData { + wk.DataReqs <- &fnpb.Elements{ + Data: []*fnpb.Elements_Data{ + { + InstructionId: b.InstID, + TransformId: b.InputTransformID, + Data: d, + IsLast: i+1 == len(b.InputData), + }, + }, + } + } + + slog.Debug("waiting on data", "bundle", b) + b.dataWait.Wait() // Wait until data is ready. +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle_test.go b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle_test.go new file mode 100644 index 000000000000..154306c3f6ba --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle_test.go @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package worker + +import ( + "bytes" + "sync" + "testing" +) + +func TestBundle_ProcessOn(t *testing.T) { + wk := New("test") + b := &B{ + InstID: "testInst", + PBDID: "testPBDID", + OutputCount: 1, + InputData: [][]byte{{1, 2, 3}}, + } + b.Init() + var completed sync.WaitGroup + completed.Add(1) + go func() { + b.ProcessOn(wk) + completed.Done() + }() + b.dataWait.Done() + gotData := <-wk.DataReqs + if got, want := gotData.GetData()[0].GetData(), []byte{1, 2, 3}; !bytes.EqualFold(got, want) { + t.Errorf("ProcessOn(): data not sent; got %v, want %v", got, want) + } + + gotInst := <-wk.InstReqs + if got, want := gotInst.GetInstructionId(), b.InstID; got != want { + t.Errorf("ProcessOn(): bad instruction ID; got %v, want %v", got, want) + } + if got, want := gotInst.GetProcessBundle().GetProcessBundleDescriptorId(), b.PBDID; got != want { + t.Errorf("ProcessOn(): bad process bundle descriptor ID; got %v, want %v", got, want) + } +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go new file mode 100644 index 000000000000..8458ce39e116 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go @@ -0,0 +1,421 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package worker handles interactions with SDK side workers, representing +// the worker services, communicating with those services, and SDK environments. +package worker + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "sync" + "sync/atomic" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" + "golang.org/x/exp/slog" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/encoding/prototext" +) + +// A W manages worker environments, sending them work +// that they're able to execute, and manages the server +// side handlers for FnAPI RPCs. +type W struct { + fnpb.UnimplementedBeamFnControlServer + fnpb.UnimplementedBeamFnDataServer + fnpb.UnimplementedBeamFnStateServer + fnpb.UnimplementedBeamFnLoggingServer + + ID string + + // Server management + lis net.Listener + server *grpc.Server + + // These are the ID sources + inst, bund uint64 + + InstReqs chan *fnpb.InstructionRequest + DataReqs chan *fnpb.Elements + + mu sync.Mutex + bundles map[string]*B // Bundles keyed by InstructionID + Descriptors map[string]*fnpb.ProcessBundleDescriptor // Stages keyed by PBDID + + D *DataService +} + +// New starts the worker server components of FnAPI Execution. +func New(id string) *W { + lis, err := net.Listen("tcp", ":0") + if err != nil { + panic(fmt.Sprintf("failed to listen: %v", err)) + } + var opts []grpc.ServerOption + wk := &W{ + ID: id, + lis: lis, + server: grpc.NewServer(opts...), + + InstReqs: make(chan *fnpb.InstructionRequest, 10), + DataReqs: make(chan *fnpb.Elements, 10), + + bundles: make(map[string]*B), + Descriptors: make(map[string]*fnpb.ProcessBundleDescriptor), + + D: &DataService{}, + } + slog.Info("Serving Worker components", slog.String("endpoint", wk.Endpoint())) + fnpb.RegisterBeamFnControlServer(wk.server, wk) + fnpb.RegisterBeamFnDataServer(wk.server, wk) + fnpb.RegisterBeamFnLoggingServer(wk.server, wk) + fnpb.RegisterBeamFnStateServer(wk.server, wk) + return wk +} + +func (wk *W) Endpoint() string { + return wk.lis.Addr().String() +} + +// Serve serves on the started listener. Blocks. +func (wk *W) Serve() { + wk.server.Serve(wk.lis) +} + +func (wk *W) String() string { + return "worker[" + wk.ID + "]" +} + +func (wk *W) LogValue() slog.Value { + return slog.GroupValue( + slog.String("ID", wk.ID), + slog.String("endpoint", wk.Endpoint()), + ) +} + +// Stop the GRPC server. +func (wk *W) Stop() { + slog.Debug("stopping", "worker", wk) + close(wk.InstReqs) + close(wk.DataReqs) + wk.server.Stop() + wk.lis.Close() + slog.Debug("stopped", "worker", wk) +} + +func (wk *W) NextInst() string { + return fmt.Sprintf("inst%03d", atomic.AddUint64(&wk.inst, 1)) +} + +func (wk *W) NextStage() string { + return fmt.Sprintf("stage%03d", atomic.AddUint64(&wk.bund, 1)) +} + +// TODO set logging level. +var minsev = fnpb.LogEntry_Severity_DEBUG + +// Logging relates SDK worker messages back to the job that spawned them. +// Messages are received from the SDK, +func (wk *W) Logging(stream fnpb.BeamFnLogging_LoggingServer) error { + for { + in, err := stream.Recv() + if err == io.EOF { + return nil + } + if err != nil { + slog.Error("logging.Recv", err, "worker", wk) + return err + } + for _, l := range in.GetLogEntries() { + if l.Severity >= minsev { + // TODO: Connect to the associated Job for this worker instead of + // logging locally for SDK side logging. + slog.Log(toSlogSev(l.GetSeverity()), l.GetMessage(), + slog.String(slog.SourceKey, l.GetLogLocation()), + slog.Time(slog.TimeKey, l.GetTimestamp().AsTime()), + "worker", wk, + ) + } + } + } +} + +func toSlogSev(sev fnpb.LogEntry_Severity_Enum) slog.Level { + switch sev { + case fnpb.LogEntry_Severity_TRACE: + return slog.Level(-8) + case fnpb.LogEntry_Severity_DEBUG: + return slog.LevelDebug // -4 + case fnpb.LogEntry_Severity_INFO: + return slog.LevelInfo // 0 + case fnpb.LogEntry_Severity_NOTICE: + return slog.Level(2) + case fnpb.LogEntry_Severity_WARN: + return slog.LevelWarn // 4 + case fnpb.LogEntry_Severity_ERROR: + return slog.LevelError // 8 + case fnpb.LogEntry_Severity_CRITICAL: + return slog.Level(10) + } + return slog.LevelInfo +} + +func (wk *W) GetProcessBundleDescriptor(ctx context.Context, req *fnpb.GetProcessBundleDescriptorRequest) (*fnpb.ProcessBundleDescriptor, error) { + desc, ok := wk.Descriptors[req.GetProcessBundleDescriptorId()] + if !ok { + return nil, fmt.Errorf("descriptor %v not found", req.GetProcessBundleDescriptorId()) + } + return desc, nil +} + +// Control relays instructions to SDKs and back again, coordinated via unique instructionIDs. +// +// Requests come from the runner, and are sent to the client in the SDK. +func (wk *W) Control(ctrl fnpb.BeamFnControl_ControlServer) error { + done := make(chan bool) + go func() { + for { + resp, err := ctrl.Recv() + if err == io.EOF { + slog.Debug("ctrl.Recv finished; marking done", "worker", wk) + done <- true // means stream is finished + return + } + if err != nil { + switch status.Code(err) { + case codes.Canceled: // Might ignore this all the time instead. + slog.Error("ctrl.Recv Canceled", err, "worker", wk) + done <- true // means stream is finished + return + default: + slog.Error("ctrl.Recv failed", err, "worker", wk) + panic(err) + } + } + + // TODO: Do more than assume these are ProcessBundleResponses. + wk.mu.Lock() + if b, ok := wk.bundles[resp.GetInstructionId()]; ok { + // TODO. Better pipeline error handling. + if resp.Error != "" { + slog.Log(slog.LevelError, "ctrl.Recv pipeline error", slog.ErrorKey, resp.GetError()) + panic(resp.GetError()) + } + b.Resp <- resp.GetProcessBundle() + } else { + slog.Debug("ctrl.Recv: %v", resp) + } + wk.mu.Unlock() + } + }() + + for req := range wk.InstReqs { + ctrl.Send(req) + } + slog.Debug("ctrl.Send finished waiting on done") + <-done + slog.Debug("Control done") + return nil +} + +// Data relays elements and timer bytes to SDKs and back again, coordinated via +// ProcessBundle instructionIDs, and receiving input transforms. +// +// Data is multiplexed on a single stream for all active bundles on a worker. +func (wk *W) Data(data fnpb.BeamFnData_DataServer) error { + go func() { + for { + resp, err := data.Recv() + if err == io.EOF { + return + } + if err != nil { + switch status.Code(err) { + case codes.Canceled: + slog.Error("data.Recv Canceled", err, "worker", wk) + return + default: + slog.Error("data.Recv failed", err, "worker", wk) + panic(err) + } + } + wk.mu.Lock() + for _, d := range resp.GetData() { + b, ok := wk.bundles[d.GetInstructionId()] + if !ok { + slog.Info("data.Recv for unknown bundle", "response", resp) + continue + } + colID := b.SinkToPCollection[d.GetTransformId()] + + // There might not be data, eg. for side inputs, so we need to reconcile this elsewhere for + // downstream side inputs. + if len(d.GetData()) > 0 { + b.OutputData.WriteData(colID, d.GetData()) + } + if d.GetIsLast() { + b.dataWait.Done() + } + } + wk.mu.Unlock() + } + }() + + for req := range wk.DataReqs { + if err := data.Send(req); err != nil { + slog.Log(slog.LevelDebug, "data.Send error", slog.ErrorKey, err) + } + } + return nil +} + +// State relays elements and timer bytes to SDKs and back again, coordinated via +// ProcessBundle instructionIDs, and receiving input transforms. +// +// State requests come from SDKs, and the runner responds. +func (wk *W) State(state fnpb.BeamFnState_StateServer) error { + responses := make(chan *fnpb.StateResponse) + go func() { + // This go routine creates all responses to state requests from the worker + // so we want to close the State handler when it's all done. + defer close(responses) + for { + req, err := state.Recv() + if err == io.EOF { + return + } + if err != nil { + switch status.Code(err) { + case codes.Canceled: + slog.Error("state.Recv Canceled", err, "worker", wk) + return + default: + slog.Error("state.Recv failed", err, "worker", wk) + panic(err) + } + } + switch req.GetRequest().(type) { + case *fnpb.StateRequest_Get: + // TODO: move data handling to be pcollection based. + b := wk.bundles[req.GetInstructionId()] + key := req.GetStateKey() + slog.Debug("StateRequest_Get", prototext.Format(req), "bundle", b) + + var data [][]byte + switch key.GetType().(type) { + case *fnpb.StateKey_IterableSideInput_: + ikey := key.GetIterableSideInput() + wKey := ikey.GetWindow() + var w typex.Window + if len(wKey) == 0 { + w = window.GlobalWindow{} + } else { + w, err = exec.MakeWindowDecoder(coder.NewIntervalWindow()).DecodeSingle(bytes.NewBuffer(wKey)) + if err != nil { + panic(fmt.Sprintf("error decoding iterable side input window key %v: %v", wKey, err)) + } + } + winMap := b.IterableSideInputData[ikey.GetTransformId()][ikey.GetSideInputId()] + var wins []typex.Window + for w := range winMap { + wins = append(wins, w) + } + slog.Debug(fmt.Sprintf("side input[%v][%v] I Key: %v Windows: %v", req.GetId(), req.GetInstructionId(), w, wins)) + data = winMap[w] + + case *fnpb.StateKey_MultimapSideInput_: + mmkey := key.GetMultimapSideInput() + wKey := mmkey.GetWindow() + var w typex.Window + if len(wKey) == 0 { + w = window.GlobalWindow{} + } else { + w, err = exec.MakeWindowDecoder(coder.NewIntervalWindow()).DecodeSingle(bytes.NewBuffer(wKey)) + if err != nil { + panic(fmt.Sprintf("error decoding iterable side input window key %v: %v", wKey, err)) + } + } + dKey := mmkey.GetKey() + winMap := b.MultiMapSideInputData[mmkey.GetTransformId()][mmkey.GetSideInputId()] + var wins []typex.Window + for w := range winMap { + wins = append(wins, w) + } + slog.Debug(fmt.Sprintf("side input[%v][%v] MM Key: %v Windows: %v", req.GetId(), req.GetInstructionId(), w, wins)) + + data = winMap[w][string(dKey)] + + default: + panic(fmt.Sprintf("unsupported StateKey Access type: %T: %v", key.GetType(), prototext.Format(key))) + } + + // Encode the runner iterable (no length, just consecutive elements), and send it out. + // This is also where we can handle things like State Backed Iterables. + var buf bytes.Buffer + for _, value := range data { + buf.Write(value) + } + responses <- &fnpb.StateResponse{ + Id: req.GetId(), + Response: &fnpb.StateResponse_Get{ + Get: &fnpb.StateGetResponse{ + Data: buf.Bytes(), + }, + }, + } + default: + panic(fmt.Sprintf("unsupported StateRequest kind %T: %v", req.GetRequest(), prototext.Format(req))) + } + } + }() + for resp := range responses { + if err := state.Send(resp); err != nil { + slog.Error("state.Send error", err) + } + } + return nil +} + +// DataService is slated to be deleted in favour of stage based state +// management for side inputs. +type DataService struct { + // TODO actually quick process the data to windows here as well. + raw map[string][][]byte +} + +// Commit tentative data to the datastore. +func (d *DataService) Commit(tent engine.TentativeData) { + if d.raw == nil { + d.raw = map[string][][]byte{} + } + for colID, data := range tent.Raw { + d.raw[colID] = append(d.raw[colID], data...) + } +} + +// GetAllData is a hack for Side Inputs until watermarks are sorted out. +func (d *DataService) GetAllData(colID string) [][]byte { + return d.raw[colID] +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go new file mode 100644 index 000000000000..29b3fab92d64 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go @@ -0,0 +1,281 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package worker + +import ( + "bytes" + "context" + "net" + "sync" + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/test/bufconn" +) + +func TestWorker_New(t *testing.T) { + w := New("test") + if got, want := w.ID, "test"; got != want { + t.Errorf("New(%q) = %v, want %v", want, got, want) + } +} + +func TestWorker_NextInst(t *testing.T) { + w := New("test") + + instIDs := map[string]struct{}{} + for i := 0; i < 100; i++ { + instIDs[w.NextInst()] = struct{}{} + } + if got, want := len(instIDs), 100; got != want { + t.Errorf("calling w.NextInst() got %v unique ids, want %v", got, want) + } +} + +func TestWorker_NextStage(t *testing.T) { + w := New("test") + + stageIDs := map[string]struct{}{} + for i := 0; i < 100; i++ { + stageIDs[w.NextStage()] = struct{}{} + } + if got, want := len(stageIDs), 100; got != want { + t.Errorf("calling w.NextStage() got %v unique ids, want %v", got, want) + } +} + +func TestWorker_GetProcessBundleDescriptor(t *testing.T) { + w := New("test") + + id := "available" + w.Descriptors[id] = &fnpb.ProcessBundleDescriptor{ + Id: id, + } + + pbd, err := w.GetProcessBundleDescriptor(context.Background(), &fnpb.GetProcessBundleDescriptorRequest{ + ProcessBundleDescriptorId: id, + }) + if err != nil { + t.Errorf("got GetProcessBundleDescriptor(%q) error: %v, want nil", id, err) + } + if got, want := pbd.GetId(), id; got != want { + t.Errorf("got GetProcessBundleDescriptor(%q) = %v, want id %v", id, got, want) + } + + pbd, err = w.GetProcessBundleDescriptor(context.Background(), &fnpb.GetProcessBundleDescriptorRequest{ + ProcessBundleDescriptorId: "unknown", + }) + if err == nil { + t.Errorf("got GetProcessBundleDescriptor(%q) = %v, want error", "unknown", pbd) + } +} + +func serveTestWorker(t *testing.T) (context.Context, *W, *grpc.ClientConn) { + t.Helper() + ctx, cancelFn := context.WithCancel(context.Background()) + t.Cleanup(cancelFn) + + w := New("test") + lis := bufconn.Listen(2048) + w.lis = lis + t.Cleanup(func() { w.Stop() }) + go w.Serve() + + clientConn, err := grpc.DialContext(ctx, "", grpc.WithContextDialer(func(ctx context.Context, _ string) (net.Conn, error) { + return lis.DialContext(ctx) + }), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatal("couldn't create bufconn grpc connection:", err) + } + return ctx, w, clientConn +} + +func TestWorker_Logging(t *testing.T) { + ctx, _, clientConn := serveTestWorker(t) + + logCli := fnpb.NewBeamFnLoggingClient(clientConn) + logStream, err := logCli.Logging(ctx) + if err != nil { + t.Fatal("couldn't create log client:", err) + } + + logStream.Send(&fnpb.LogEntry_List{ + LogEntries: []*fnpb.LogEntry{{ + Severity: fnpb.LogEntry_Severity_INFO, + Message: "squeamish ossiphrage", + }}, + }) + + // TODO: Connect to the job management service. + // At this point job messages are just logged to wherever the prism runner executes + // But this should pivot to anyone connecting to the Job Management service for the + // job. + // In the meantime, sleep to validate execution via coverage. + time.Sleep(20 * time.Millisecond) +} + +func TestWorker_Control_HappyPath(t *testing.T) { + ctx, wk, clientConn := serveTestWorker(t) + + ctrlCli := fnpb.NewBeamFnControlClient(clientConn) + ctrlStream, err := ctrlCli.Control(ctx) + if err != nil { + t.Fatal("couldn't create control client:", err) + } + + instID := wk.NextInst() + + b := &B{} + b.Init() + wk.bundles[instID] = b + b.ProcessOn(wk) + + ctrlStream.Send(&fnpb.InstructionResponse{ + InstructionId: instID, + Response: &fnpb.InstructionResponse_ProcessBundle{ + ProcessBundle: &fnpb.ProcessBundleResponse{ + RequiresFinalization: true, // Simple thing to check. + }, + }, + }) + + if err := ctrlStream.CloseSend(); err != nil { + t.Errorf("ctrlStream.CloseSend() = %v", err) + } + resp := <-b.Resp + + if !resp.RequiresFinalization { + t.Errorf("got %v, want response that Requires Finalization", resp) + } +} + +func TestWorker_Data_HappyPath(t *testing.T) { + ctx, wk, clientConn := serveTestWorker(t) + + dataCli := fnpb.NewBeamFnDataClient(clientConn) + dataStream, err := dataCli.Data(ctx) + if err != nil { + t.Fatal("couldn't create data client:", err) + } + + instID := wk.NextInst() + + b := &B{ + InstID: instID, + PBDID: wk.NextStage(), + InputData: [][]byte{ + {1, 1, 1, 1, 1, 1}, + }, + OutputCount: 1, + } + b.Init() + wk.bundles[instID] = b + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + b.ProcessOn(wk) + }() + + wk.InstReqs <- &fnpb.InstructionRequest{ + InstructionId: instID, + } + + elements, err := dataStream.Recv() + if err != nil { + t.Fatal("couldn't receive data elements:", err) + } + + if got, want := elements.GetData()[0].GetInstructionId(), b.InstID; got != want { + t.Fatalf("couldn't receive data elements ID: got %v, want %v", got, want) + } + if got, want := elements.GetData()[0].GetData(), []byte{1, 1, 1, 1, 1, 1}; !bytes.Equal(got, want) { + t.Fatalf("client Data received %v, want %v", got, want) + } + if got, want := elements.GetData()[0].GetIsLast(), true; got != want { + t.Fatalf("client Data received wasn't last: got %v, want %v", got, want) + } + + dataStream.Send(elements) + + if err := dataStream.CloseSend(); err != nil { + t.Errorf("ctrlStream.CloseSend() = %v", err) + } + + wg.Wait() + t.Log("ProcessOn successfully exited") +} + +func TestWorker_State_Iterable(t *testing.T) { + ctx, wk, clientConn := serveTestWorker(t) + + stateCli := fnpb.NewBeamFnStateClient(clientConn) + stateStream, err := stateCli.State(ctx) + if err != nil { + t.Fatal("couldn't create state client:", err) + } + + instID := wk.NextInst() + wk.bundles[instID] = &B{ + IterableSideInputData: map[string]map[string]map[typex.Window][][]byte{ + "transformID": { + "i1": { + window.GlobalWindow{}: [][]byte{ + {42}, + }, + }, + }, + }, + } + + stateStream.Send(&fnpb.StateRequest{ + Id: "first", + InstructionId: instID, + Request: &fnpb.StateRequest_Get{ + Get: &fnpb.StateGetRequest{}, + }, + StateKey: &fnpb.StateKey{Type: &fnpb.StateKey_IterableSideInput_{ + IterableSideInput: &fnpb.StateKey_IterableSideInput{ + TransformId: "transformID", + SideInputId: "i1", + Window: []byte{}, // Global Windows + }, + }}, + }) + + resp, err := stateStream.Recv() + if err != nil { + t.Fatal("couldn't receive state response:", err) + } + + if got, want := resp.GetId(), "first"; got != want { + t.Fatalf("didn't receive expected state response: got %v, want %v", got, want) + } + + if got, want := resp.GetGet().GetData(), []byte{42}; !bytes.Equal(got, want) { + t.Fatalf("didn't receive expected state response data: got %v, want %v", got, want) + } + + if err := stateStream.CloseSend(); err != nil { + t.Errorf("stateStream.CloseSend() = %v", err) + } +} diff --git a/sdks/go/pkg/beam/runners/prism/prism.go b/sdks/go/pkg/beam/runners/prism/prism.go new file mode 100644 index 000000000000..dc78e5e6c230 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/prism.go @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package prism contains a local runner for running +// pipelines in the current process. Useful for testing. +package prism + +import ( + "context" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" +) + +func init() { + beam.RegisterRunner("prism", Execute) + beam.RegisterRunner("PrismRunner", Execute) +} + +func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) { + if *jobopts.Endpoint == "" { + // One hasn't been selected, so lets start one up and set the address. + // Conveniently, this means that if multiple pipelines are executed against + // the local runner, they will all use the same server. + s := jobservices.NewServer(0, internal.RunPipeline) + *jobopts.Endpoint = s.Endpoint() + go s.Serve() + } + if !jobopts.IsLoopback() { + *jobopts.EnvironmentType = "loopback" + } + return universal.Execute(ctx, p) +} diff --git a/sdks/go/test/integration/primitives/checkpointing.go b/sdks/go/test/integration/primitives/checkpointing.go index ae61f3186294..7a349699dd4d 100644 --- a/sdks/go/test/integration/primitives/checkpointing.go +++ b/sdks/go/test/integration/primitives/checkpointing.go @@ -43,7 +43,7 @@ func (fn *selfCheckpointingDoFn) CreateInitialRestriction(_ []byte) offsetrange. } } -// CreateTracker wraps the fiven restriction into a LockRTracker type. +// CreateTracker wraps the given restriction into a LockRTracker type. func (fn *selfCheckpointingDoFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker { return sdf.NewLockRTracker(offsetrange.NewTracker(rest)) } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java index c7b39d5b025a..8fa162ecf8e4 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java @@ -107,10 +107,15 @@ * org.apache.beam.sdk.transforms.GroupByKey} operations. * * @param the type of elements handled by this coder + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.coders.AvroCoder instead of this one. */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated public class AvroCoder extends CustomCoder { /** diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroGenericCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroGenericCoder.java index be726ccda1c1..7d90206ce4c5 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroGenericCoder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroGenericCoder.java @@ -20,7 +20,15 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; -/** AvroCoder specialisation for GenericRecord. */ +/** + * AvroCoder specialisation for GenericRecord. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder instead of this one. + */ +@Deprecated public class AvroGenericCoder extends AvroCoder { AvroGenericCoder(Schema schema) { super(GenericRecord.class, schema); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/DefaultCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/DefaultCoder.java index bf63103e84c2..f83072626554 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/DefaultCoder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/DefaultCoder.java @@ -74,7 +74,7 @@ public List getCoderProviders() { * A {@link CoderProvider} that uses the {@code @DefaultCoder} annotation to provide {@link * CoderProvider coder providers} that create {@link Coder}s. */ - static class DefaultCoderProvider extends CoderProvider { + public static class DefaultCoderProvider extends CoderProvider { private static final Logger LOG = LoggerFactory.getLogger(DefaultCoderProvider.class); /** diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java index e1a3c8a2912c..c8ccf7532296 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java @@ -328,10 +328,16 @@ * events.apply("WriteAvros", AvroIO.writeCustomTypeToGenericRecords() * .to(new UserDynamicAvroDestinations(userToSchemaMap))); * } + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.AvroIO instead of this one. */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated public class AvroIO { /** * Reads records of the given type from an Avro file (or multiple Avro files matching a pattern). diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSchemaIOProvider.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSchemaIOProvider.java index 90519e386002..43498235992a 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSchemaIOProvider.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSchemaIOProvider.java @@ -43,12 +43,18 @@ /** * An implementation of {@link SchemaIOProvider} for reading and writing Avro files with {@link * AvroIO}. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.AvroSchemaIOProvider instead of this one. */ @Internal @AutoService(SchemaIOProvider.class) @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated public class AvroSchemaIOProvider implements SchemaIOProvider { /** Returns an id that uniquely represents this IO. */ @Override diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java index fe463d704a58..bc92113925cd 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java @@ -32,10 +32,18 @@ import org.apache.beam.sdk.util.MimeTypes; import org.checkerframework.checker.nullness.qual.Nullable; -/** A {@link FileBasedSink} for Avro files. */ +/** + * A {@link FileBasedSink} for Avro files. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.AvroSink instead of this one. + */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated public class AvroSink extends FileBasedSink { private final boolean genericRecords; diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java index 3b7824fa449e..22a5f1e0d24b 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java @@ -120,12 +120,17 @@ * details. * * @param The type of records to be read from the source. + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.AvroSource instead of this one. */ // CHECKSTYLE.ON: JavadocStyle @Experimental(Kind.SOURCE_SINK) @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated public class AvroSource extends BlockBasedSource { // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to // ensure that every source has at least one block of records). diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ConstantAvroDestination.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ConstantAvroDestination.java index 832526efa88f..cc9957474b9b 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ConstantAvroDestination.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ConstantAvroDestination.java @@ -31,10 +31,18 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding; import org.checkerframework.checker.nullness.qual.Nullable; -/** Always returns a constant {@link FilenamePolicy}, {@link Schema}, metadata, and codec. */ +/** + * Always returns a constant {@link FilenamePolicy}, {@link Schema}, metadata, and codec. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.ConstantAvroDestination instead of this one. + */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated class ConstantAvroDestination extends DynamicAvroDestinations { private static class SchemaFunction implements Serializable, Function { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicAvroDestinations.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicAvroDestinations.java index 4bb450bffe1c..40c8657a01ae 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicAvroDestinations.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicAvroDestinations.java @@ -28,7 +28,13 @@ * A specialization of {@link DynamicDestinations} for {@link AvroIO}. In addition to dynamic file * destinations, this allows specifying other AVRO properties (schema, metadata, codec, datum * writer) per destination. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.DynamicAvroDestinations instead of this one. */ +@Deprecated public abstract class DynamicAvroDestinations extends DynamicDestinations { /** Return an AVRO schema for a given destination. */ diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/SerializableAvroCodecFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/SerializableAvroCodecFactory.java index 5218eb683aac..b7b5da72ba48 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/SerializableAvroCodecFactory.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/SerializableAvroCodecFactory.java @@ -39,10 +39,17 @@ /** * A wrapper that allows {@link org.apache.avro.file.CodecFactory}s to be serialized using Java's * standard serialization mechanisms. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.io.SerializableAvroCodecFactory instead of this + * one. */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) +@Deprecated class SerializableAvroCodecFactory implements Externalizable { private static final long serialVersionUID = 7445324844109564303L; private static final List noOptAvroCodecs = diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/AvroRecordSchema.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/AvroRecordSchema.java index c616901a396e..19027cd4527f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/AvroRecordSchema.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/AvroRecordSchema.java @@ -30,10 +30,16 @@ *

This provider infers a schema from generated SpecificRecord objects, and creates schemas and * rows that bind to the appropriate fields. This provider also infers schemas from Java POJO * objects, creating a schema that matches that inferred by the AVRO libraries. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.schemas.AvroRecordSchema instead of this one. */ @SuppressWarnings({ "rawtypes" // TODO(https://github.com/apache/beam/issues/20447) }) +@Deprecated public class AvroRecordSchema extends GetterBasedSchemaProvider { @Override public Schema schemaFor(TypeDescriptor typeDescriptor) { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/Providers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/Providers.java index dc0f758b4aba..c4a4902b9d6c 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/Providers.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/Providers.java @@ -17,7 +17,7 @@ */ package org.apache.beam.sdk.schemas.io; -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; import java.util.HashMap; import java.util.Map; @@ -30,6 +30,7 @@ @Internal @Experimental(Kind.SCHEMAS) public final class Providers { + public interface Identifyable { /** * Returns an id that uniquely represents this among others implementing its derived interface. @@ -42,12 +43,27 @@ private Providers() {} public static Map loadProviders(Class klass) { Map providers = new HashMap<>(); for (T provider : ServiceLoader.load(klass)) { - checkArgument( - !providers.containsKey(provider.identifier()), - "Duplicate providers exist with identifier `%s` for class %s.", - provider.identifier(), - klass); - providers.put(provider.identifier(), provider); + // Avro provider is treated as a special case since two Avro providers may want to be loaded - + // from "core" (deprecated) and from "extensions/avro" (actual) - but only one must succeed. + // TODO: we won't need this check once all Avro providers from "core" will be + // removed + if (provider.identifier().equals("avro")) { + // Avro provider from "extensions/avro" must have a priority. + if (provider.getClass().getName().startsWith("org.apache.beam.sdk.extensions.avro")) { + // Load Avro provider from "extensions/avro" by any case. + providers.put(provider.identifier(), provider); + } else { + // Load Avro provider from "core" if it was not loaded from Avro extension before. + providers.putIfAbsent(provider.identifier(), provider); + } + } else { + checkState( + !providers.containsKey(provider.identifier()), + "Duplicate providers exist with identifier `%s` for class %s.", + provider.identifier(), + klass); + providers.put(provider.identifier(), provider); + } } return providers; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/payloads/AvroPayloadSerializerProvider.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/payloads/AvroPayloadSerializerProvider.java index ace671fccd6f..5ffc549f7fbe 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/payloads/AvroPayloadSerializerProvider.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/io/payloads/AvroPayloadSerializerProvider.java @@ -25,8 +25,16 @@ import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.utils.AvroUtils; +/** + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider + * instead of this one. + */ @Internal @Experimental(Kind.SCHEMAS) +@Deprecated @AutoService(PayloadSerializerProvider.class) public class AvroPayloadSerializerProvider implements PayloadSerializerProvider { @Override diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java index eb01b83c94fc..fe4c76a2a26b 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroByteBuddyUtils.java @@ -48,11 +48,19 @@ import org.apache.beam.sdk.values.TypeDescriptor; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; +/** + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.schemas.utils.AvroByteBuddyUtils instead of this + * one. + */ @Experimental(Kind.SCHEMAS) @SuppressWarnings({ "nullness", // TODO(https://github.com/apache/beam/issues/20497) "rawtypes" }) +@Deprecated class AvroByteBuddyUtils { private static final ByteBuddy BYTE_BUDDY = new ByteBuddy(); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java index a45728236ab2..93362f4ceb6d 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/AvroUtils.java @@ -133,12 +133,18 @@ * * * is used. + * + * @deprecated Avro related classes are deprecated in module beam-sdks-java-core and + * will be eventually removed. Please, migrate to a new module + * beam-sdks-java-extensions-avro by importing + * org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils instead of this one. */ @Experimental(Kind.SCHEMAS) @SuppressWarnings({ "nullness", // TODO(https://github.com/apache/beam/issues/20497) "rawtypes" }) +@Deprecated public class AvroUtils { static { // This works around a bug in the Avro library (AVRO-1891) around SpecificRecord's handling diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java index f3ecc95005bc..1bb5525af329 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java @@ -30,9 +30,13 @@ import static org.junit.Assume.assumeFalse; import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.nio.channels.Channels; import java.nio.channels.WritableByteChannel; import java.nio.file.Files; @@ -43,8 +47,9 @@ import java.util.List; import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.coders.CustomCoder; import org.apache.beam.sdk.coders.DefaultCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory; @@ -209,7 +214,26 @@ private void testDynamicDestinations(boolean customType) throws Exception { DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE); } - @DefaultCoder(AvroCoder.class) + public static class UserWriteTypeCoder extends CustomCoder { + + @Override + public void encode(UserWriteType value, OutputStream outStream) + throws CoderException, IOException { + DataOutputStream stream = new DataOutputStream(outStream); + StringUtf8Coder.of().encode(value.destination, stream); + StringUtf8Coder.of().encode(value.metadata, stream); + } + + @Override + public UserWriteType decode(InputStream inStream) throws CoderException, IOException { + DataInputStream stream = new DataInputStream(inStream); + String dest = StringUtf8Coder.of().decode(stream); + String meta = StringUtf8Coder.of().decode(stream); + return new UserWriteType(dest, meta); + } + } + + @DefaultCoder(UserWriteTypeCoder.class) private static class UserWriteType { String destination; String metadata; @@ -279,6 +303,8 @@ public void testDynamicDefaultFilenamePolicy() throws Exception { new UserWriteType("baab", "fourth"), new UserWriteType("caaa", "fifth"), new UserWriteType("caab", "sixth")); + + p.getCoderRegistry().registerCoderForClass(UserWriteType.class, new UserWriteTypeCoder()); PCollection input = p.apply(Create.of(elements)); input.apply( TextIO.writeCustomType() diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/LatestTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/LatestTest.java index 179d1ef98a0a..20b04d88bdab 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/LatestTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/LatestTest.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertEquals; import java.io.Serializable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.BigEndianLongCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.KvCoder; @@ -108,8 +107,7 @@ public void testPerKeyEventTimestamp() { public void testPerKeyOutputCoder() { p.enableAbandonedNodeEnforcement(false); - KvCoder inputCoder = - KvCoder.of(AvroCoder.of(String.class), AvroCoder.of(Long.class)); + KvCoder inputCoder = KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()); PCollection> output = p.apply(Create.of(KV.of("foo", 1L)).withCoder(inputCoder)).apply(Latest.perKey()); diff --git a/sdks/java/extensions/avro/build.gradle b/sdks/java/extensions/avro/build.gradle index dae13cd99728..f73a9efccfb5 100644 --- a/sdks/java/extensions/avro/build.gradle +++ b/sdks/java/extensions/avro/build.gradle @@ -20,8 +20,8 @@ plugins { id 'org.apache.beam.module' } applyJavaNature( automaticModuleName: 'org.apache.beam.sdk.extensions.avro', disableLintWarnings: ['rawtypes'], // Avro-generated test code has raw-type errors - publish: false, - exportJavadoc: false, + publish: true, + exportJavadoc: true, ) applyAvroNature() diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/DefaultCoderTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/DefaultCoderTest.java new file mode 100644 index 000000000000..82991f191887 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/DefaultCoderTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.coders; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.instanceOf; + +import java.util.List; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.coders.DefaultCoder.DefaultCoderProviderRegistrar.DefaultCoderProvider; +import org.apache.beam.sdk.coders.ListCoder; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link DefaultCoder}. */ +@RunWith(JUnit4.class) +public class DefaultCoderTest { + + @Rule public ExpectedException thrown = ExpectedException.none(); + + @DefaultCoder(AvroCoder.class) + private static class AvroRecord {} + + @Test + public void testCodersWithoutComponents() throws Exception { + CoderRegistry registry = CoderRegistry.createDefault(); + registry.registerCoderProvider(new DefaultCoderProvider()); + assertThat(registry.getCoder(AvroRecord.class), instanceOf(AvroCoder.class)); + } + + @Test + public void testDefaultCoderInCollection() throws Exception { + CoderRegistry registry = CoderRegistry.createDefault(); + registry.registerCoderProvider(new DefaultCoderProvider()); + Coder> avroRecordCoder = + registry.getCoder(new TypeDescriptor>() {}); + assertThat(avroRecordCoder, instanceOf(ListCoder.class)); + assertThat(((ListCoder) avroRecordCoder).getElemCoder(), instanceOf(AvroCoder.class)); + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/SchemaCoderTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/SchemaCoderTest.java new file mode 100644 index 000000000000..aa026d441801 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/SchemaCoderTest.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.fail; + +import java.time.Instant; +import java.util.Collection; +import java.util.Objects; +import java.util.function.Supplier; +import org.apache.avro.reflect.AvroSchema; +import org.apache.beam.sdk.coders.Coder.NonDeterministicException; +import org.apache.beam.sdk.coders.RowCoder; +import org.apache.beam.sdk.schemas.NoSuchSchemaException; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.Field; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.SchemaCoder; +import org.apache.beam.sdk.schemas.SchemaRegistry; +import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.apache.beam.sdk.schemas.logicaltypes.NanosInstant; +import org.apache.beam.sdk.testing.CoderProperties; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.hamcrest.Matchers; +import org.joda.time.DateTime; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** Unit tests for {@link Schema}. */ +@RunWith(Enclosed.class) +@SuppressWarnings({ + "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) +}) +public class SchemaCoderTest { + + public static final Schema LOGICAL_NANOS_SCHEMA = + Schema.of(Field.of("logicalNanos", FieldType.logicalType(new NanosInstant()))); + public static final Schema FLOATING_POINT_SCHEMA = + Schema.of(Field.of("float", FieldType.FLOAT), Field.of("double", FieldType.DOUBLE)); + + @DefaultSchema(AvroRecordSchema.class) + private static class SimpleAvro { + public String string; + public Integer int32; + public Long int64; + + @AvroSchema("{\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}") + public DateTime datetime; + + public SimpleAvro(String string, Integer int32, Long int64, DateTime datetime) { + this.string = string; + this.int32 = int32; + this.int64 = int64; + this.datetime = datetime; + } + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SimpleAvro that = (SimpleAvro) o; + return string.equals(that.string) + && int32.equals(that.int32) + && int64.equals(that.int64) + && datetime.equals(that.datetime); + } + + @Override + public int hashCode() { + return Objects.hash(string, int32, int64, datetime); + } + } + + private static final SchemaRegistry REGISTRY = SchemaRegistry.createDefault(); + + private static SchemaCoder coderFrom(TypeDescriptor typeDescriptor) throws NoSuchSchemaException { + return SchemaCoder.of( + REGISTRY.getSchema(typeDescriptor), + typeDescriptor, + REGISTRY.getToRowFunction(typeDescriptor), + REGISTRY.getFromRowFunction(typeDescriptor)); + } + + @RunWith(Parameterized.class) + public static class ParameterizedTests { + + @Parameterized.Parameter(0) + public SchemaCoder coder; + + @Parameterized.Parameter(1) + public ImmutableList> testValues; + + @Parameterized.Parameter(2) + public boolean expectDeterministic; + + @Parameterized.Parameters(name = "{index}: coder = {0}") + public static Collection data() throws NoSuchSchemaException { + return ImmutableList.of( + new Object[] { + coderFrom(TypeDescriptor.of(SimpleAvro.class)), + ImmutableList.>of( + () -> + new SimpleAvro( + "foo", + 9001, + 0L, + new DateTime().withDate(1979, 3, 14).withTime(10, 30, 0, 0)), + () -> + new SimpleAvro( + "bar", + 9002, + 1L, + new DateTime().withDate(1989, 3, 14).withTime(10, 30, 0, 0))), + true + }, + new Object[] { + RowCoder.of(LOGICAL_NANOS_SCHEMA), + ImmutableList.>of( + () -> + Row.withSchema(LOGICAL_NANOS_SCHEMA) + .withFieldValue("logicalNanos", Instant.ofEpochMilli(9001)) + .build()), + true + }, + new Object[] { + RowCoder.of(FLOATING_POINT_SCHEMA), + ImmutableList.>of( + () -> + Row.withSchema(FLOATING_POINT_SCHEMA) + .withFieldValue("float", (float) 1.0) + .withFieldValue("double", 2.0) + .build()), + false + }); + } + + @Test + public void coderSerializable() { + CoderProperties.coderSerializable(coder); + } + + @Test + public void coderConsistentWithEquals() throws Exception { + for (Supplier testValueA : testValues) { + for (Supplier testValueB : testValues) { + CoderProperties.coderConsistentWithEquals(coder, testValueA.get(), testValueB.get()); + } + } + } + + @Test + public void verifyDeterministic() throws Exception { + if (expectDeterministic) { + for (Supplier testValue : testValues) { + CoderProperties.coderDeterministic(coder, testValue.get(), testValue.get()); + } + } else { + assertNonDeterministic(coder); + } + } + } + + private static void assertNonDeterministic(SchemaCoder coder) { + try { + coder.verifyDeterministic(); + fail("Expected " + coder + " to be non-deterministic."); + } catch (NonDeterministicException e) { + assertThat(e.getReasons(), Matchers.iterableWithSize(1)); + } + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/transforms/ConvertTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/transforms/ConvertTest.java new file mode 100644 index 000000000000..a33fa013be17 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/transforms/ConvertTest.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.transforms; + +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; +import org.apache.avro.generic.GenericRecord; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.JavaFieldSchema; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.apache.beam.sdk.schemas.transforms.Convert; +import org.apache.beam.sdk.testing.NeedsRunner; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.UsesSchema; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for the {@link Convert} class. */ +@RunWith(JUnit4.class) +@Category(UsesSchema.class) +public class ConvertTest { + @Rule public final transient TestPipeline pipeline = TestPipeline.create(); + + /** Test outer POJO. * */ + @DefaultSchema(JavaFieldSchema.class) + public static class POJO1 { + public String field1 = "field1"; + public long field2 = 42; + public POJO1Nested field3 = new POJO1Nested(); + public POJO1Nested[] field4 = new POJO1Nested[] {new POJO1Nested(), new POJO1Nested()}; + public Map field5 = + ImmutableMap.of( + "first", new POJO1Nested(), + "second", new POJO1Nested()); + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + POJO1 pojo1 = (POJO1) o; + return field2 == pojo1.field2 + && Objects.equals(field1, pojo1.field1) + && Objects.equals(field3, pojo1.field3) + && Arrays.equals(field4, pojo1.field4) + && Objects.equals(field5, pojo1.field5); + } + + @Override + public int hashCode() { + int result = Objects.hash(field1, field2, field3, field5); + result = 31 * result + Arrays.hashCode(field4); + return result; + } + } + + /** Test inner POJO. * */ + @DefaultSchema(JavaFieldSchema.class) + public static class POJO1Nested { + public String yard1 = "yard2"; + public long yard2 = 43; + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + POJO1Nested that = (POJO1Nested) o; + return yard2 == that.yard2 && Objects.equals(yard1, that.yard1); + } + + @Override + public int hashCode() { + return Objects.hash(yard1, yard2); + } + } + + private static final Schema EXPECTED_SCHEMA1_NESTED = + Schema.builder().addStringField("yard1").addInt64Field("yard2").build(); + + private static final Schema EXPECTED_SCHEMA1 = + Schema.builder() + .addStringField("field1") + .addInt64Field("field2") + .addRowField("field3", EXPECTED_SCHEMA1_NESTED) + .addArrayField("field4", FieldType.row(EXPECTED_SCHEMA1_NESTED)) + .addMapField("field5", FieldType.STRING, FieldType.row(EXPECTED_SCHEMA1_NESTED)) + .build(); + + private static final Row EXPECTED_ROW1_NESTED = + Row.withSchema(EXPECTED_SCHEMA1_NESTED).addValues("yard2", 43L).build(); + + private static final Row EXPECTED_ROW1 = + Row.withSchema(EXPECTED_SCHEMA1) + .addValue("field1") + .addValue(42L) + .addValue(EXPECTED_ROW1_NESTED) + .addArray(ImmutableList.of(EXPECTED_ROW1_NESTED, EXPECTED_ROW1_NESTED)) + .addValue(ImmutableMap.of("first", EXPECTED_ROW1_NESTED, "second", EXPECTED_ROW1_NESTED)) + .build(); + + private static final GenericRecord EXPECTED_GENERICRECORD1 = + AvroUtils.toGenericRecord(EXPECTED_ROW1, AvroUtils.toAvroSchema(EXPECTED_SCHEMA1)); + + @Test + @Category(NeedsRunner.class) + public void testToGenericRecords() { + PCollection records = + pipeline.apply(Create.of(new POJO1())).apply(Convert.to(GenericRecord.class)); + PAssert.that(records).containsInAnyOrder(EXPECTED_GENERICRECORD1); + pipeline.run(); + } +} diff --git a/sdks/java/extensions/schemaio-expansion-service/src/main/java/org/apache/beam/sdk/extensions/schemaio/expansion/ExternalSchemaIOTransformRegistrar.java b/sdks/java/extensions/schemaio-expansion-service/src/main/java/org/apache/beam/sdk/extensions/schemaio/expansion/ExternalSchemaIOTransformRegistrar.java index f61a34532cbb..d8f797e8168b 100644 --- a/sdks/java/extensions/schemaio-expansion-service/src/main/java/org/apache/beam/sdk/extensions/schemaio/expansion/ExternalSchemaIOTransformRegistrar.java +++ b/sdks/java/extensions/schemaio-expansion-service/src/main/java/org/apache/beam/sdk/extensions/schemaio/expansion/ExternalSchemaIOTransformRegistrar.java @@ -17,10 +17,13 @@ */ package org.apache.beam.sdk.extensions.schemaio.expansion; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; + import com.google.auto.service.AutoService; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.HashMap; import java.util.Map; import java.util.ServiceLoader; import javax.annotation.Nullable; @@ -50,22 +53,57 @@ public class ExternalSchemaIOTransformRegistrar implements ExternalTransformRegi @Override public Map> knownBuilderInstances() { - ImmutableMap.Builder builder = ImmutableMap.builder(); + Map> providers = new HashMap<>(); try { - for (SchemaIOProvider schemaIOProvider : ServiceLoader.load(SchemaIOProvider.class)) { - builder.put( - "beam:transform:org.apache.beam:schemaio_" + schemaIOProvider.identifier() + "_read:v1", - new ReaderBuilder(schemaIOProvider)); - builder.put( - "beam:transform:org.apache.beam:schemaio_" - + schemaIOProvider.identifier() - + "_write:v1", - new WriterBuilder(schemaIOProvider)); + for (SchemaIOProvider provider : ServiceLoader.load(SchemaIOProvider.class)) { + // Avro provider is treated as a special case since two Avro providers may want to be loaded + // from "core" (deprecated) and from "extensions/avro" (actual) - but only one must succeed. + // TODO: we won't need this check once all Avro providers from "core" will be + // removed + if (provider.identifier().equals("avro")) { + // Avro provider from "extensions/avro" must have a priority. + if (provider.getClass().getName().startsWith("org.apache.beam.sdk.extensions.avro")) { + // Load Avro provider from "extensions/avro" by any case. + registerProvider(providers, provider); + } else { + // Load Avro provider from "core" if it was not loaded from Avro extension before. + registerProviderOptionally(providers, provider); + } + } else { + final String identifier = + "beam:transform:org.apache.beam:schemaio_" + provider.identifier() + "_read:v1"; + checkState( + !providers.containsKey(identifier), + "Duplicate providers exist with identifier `%s` for class %s.", + identifier, + SchemaIOProvider.class); + registerProvider(providers, provider); + } } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } - return builder.build(); + return ImmutableMap.copyOf(providers); + } + + private void registerProvider( + Map> providers, SchemaIOProvider provider) { + providers.put( + "beam:transform:org.apache.beam:schemaio_" + provider.identifier() + "_read:v1", + new ReaderBuilder(provider)); + providers.put( + "beam:transform:org.apache.beam:schemaio_" + provider.identifier() + "_write:v1", + new WriterBuilder(provider)); + } + + private void registerProviderOptionally( + Map> providers, SchemaIOProvider provider) { + providers.putIfAbsent( + "beam:transform:org.apache.beam:schemaio_" + provider.identifier() + "_read:v1", + new ReaderBuilder(provider)); + providers.putIfAbsent( + "beam:transform:org.apache.beam:schemaio_" + provider.identifier() + "_write:v1", + new WriterBuilder(provider)); } public static class Configuration { diff --git a/sdks/java/extensions/sketching/build.gradle b/sdks/java/extensions/sketching/build.gradle index 1060804713d6..b239771ad274 100644 --- a/sdks/java/extensions/sketching/build.gradle +++ b/sdks/java/extensions/sketching/build.gradle @@ -32,6 +32,7 @@ dependencies { implementation "com.tdunning:t-digest:$tdigest_version" testImplementation library.java.avro testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation library.java.junit testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } diff --git a/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/ApproximateDistinctTest.java b/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/ApproximateDistinctTest.java index 2d9933955d31..0cb3e0e5116d 100644 --- a/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/ApproximateDistinctTest.java +++ b/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/ApproximateDistinctTest.java @@ -29,8 +29,8 @@ import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.sketching.ApproximateDistinct.ApproximateDistinctFn; import org.apache.beam.sdk.testing.CoderProperties; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/SketchFrequenciesTest.java b/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/SketchFrequenciesTest.java index 0f19f81ca890..2a37be8ae193 100644 --- a/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/SketchFrequenciesTest.java +++ b/sdks/java/extensions/sketching/src/test/java/org/apache/beam/sdk/extensions/sketching/SketchFrequenciesTest.java @@ -29,9 +29,9 @@ import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.sketching.SketchFrequencies.CountMinSketchFn; import org.apache.beam.sdk.extensions.sketching.SketchFrequencies.Sketch; import org.apache.beam.sdk.testing.CoderProperties; diff --git a/sdks/java/extensions/sql/build.gradle b/sdks/java/extensions/sql/build.gradle index 4f9efec51789..de692d2d651e 100644 --- a/sdks/java/extensions/sql/build.gradle +++ b/sdks/java/extensions/sql/build.gradle @@ -74,6 +74,7 @@ dependencies { fmppTask "org.freemarker:freemarker:2.3.31" fmppTemplates library.java.vendored_calcite_1_28_0 implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:join-library") permitUnusedDeclared project(":sdks:java:extensions:join-library") // BEAM-11761 implementation project(":sdks:java:extensions:sql:udf") diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/avro/AvroTableProvider.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/avro/AvroTableProvider.java index 4df6a2b2795c..fb6855a3fb7c 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/avro/AvroTableProvider.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/avro/AvroTableProvider.java @@ -18,10 +18,10 @@ package org.apache.beam.sdk.extensions.sql.meta.provider.avro; import com.google.auto.service.AutoService; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.io.AvroSchemaIOProvider; import org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper; import org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider; -import org.apache.beam.sdk.io.AvroIO; -import org.apache.beam.sdk.io.AvroSchemaIOProvider; import org.apache.beam.sdk.schemas.io.SchemaIOProvider; /** diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/parquet/ParquetTable.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/parquet/ParquetTable.java index c38ed0ccf68e..88d162c02370 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/parquet/ParquetTable.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/parquet/ParquetTable.java @@ -25,6 +25,7 @@ import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTableFilter; import org.apache.beam.sdk.extensions.sql.meta.ProjectSupport; import org.apache.beam.sdk.extensions.sql.meta.SchemaBaseBeamTable; @@ -33,7 +34,6 @@ import org.apache.beam.sdk.io.parquet.ParquetIO; import org.apache.beam.sdk.io.parquet.ParquetIO.Read; import org.apache.beam.sdk.schemas.transforms.Convert; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollection.IsBounded; diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableAvroTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableAvroTest.java index e784865eab89..e468d5b9eef3 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableAvroTest.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/BeamKafkaTableAvroTest.java @@ -23,10 +23,10 @@ import java.util.List; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.sql.meta.Table; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/KafkaTableProviderIT.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/KafkaTableProviderIT.java index 21f0d4fb3e16..34b7efabb880 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/KafkaTableProviderIT.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/kafka/KafkaTableProviderIT.java @@ -37,6 +37,7 @@ import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.RowCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.protobuf.PayloadMessages; import org.apache.beam.sdk.extensions.protobuf.ProtoMessageSchema; import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv; @@ -52,7 +53,6 @@ import org.apache.beam.sdk.options.Validation; import org.apache.beam.sdk.schemas.RowMessages; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.state.BagState; import org.apache.beam.sdk.state.StateSpec; import org.apache.beam.sdk.state.StateSpecs; diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubTableProviderIT.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubTableProviderIT.java index 7bd872e7c510..2d79dcb0fff3 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubTableProviderIT.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/pubsub/PubsubTableProviderIT.java @@ -49,7 +49,8 @@ import java.util.stream.Collectors; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; import org.apache.beam.sdk.extensions.protobuf.PayloadMessages; import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv; @@ -66,7 +67,6 @@ import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.SchemaCoder; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.util.common.ReflectHelpers; import org.apache.beam.sdk.values.PCollection; diff --git a/sdks/java/harness/build.gradle b/sdks/java/harness/build.gradle index 813a32c46178..5ca0a8e2003a 100644 --- a/sdks/java/harness/build.gradle +++ b/sdks/java/harness/build.gradle @@ -53,6 +53,7 @@ applyJavaNature( "org/apache/beam/model/fnexecution/**", "org/apache/beam/runners/core/**", "org/apache/beam/runners/core/construction/**", + "org/apache/beam/sdk/extensions/avro/**", "org/apache/beam/sdk/fn/**", "org/checkerframework/**", "org/hamcrest/**", diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java index 0cfcb0a84f2a..561bb0f39fdf 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java @@ -2165,7 +2165,7 @@ public String getErrorContext() { } /** Provides arguments for a {@link DoFnInvoker} for a window observing method. */ - private class WindowObservingProcessBundleContext extends ProcessBundleContextBase { + private abstract class WindowObservingProcessBundleContextBase extends ProcessBundleContextBase { @Override public BoundedWindow window() { return currentWindow; @@ -2180,6 +2180,53 @@ public Object sideInput(String tagId) { public T sideInput(PCollectionView view) { return stateAccessor.get(view, currentWindow); } + } + + private class WindowObservingProcessBundleContext + extends WindowObservingProcessBundleContextBase { + + @Override + public void output(OutputT output) { + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo( + mainOutputConsumer, + WindowedValue.of( + output, currentElement.getTimestamp(), currentWindow, currentElement.getPane())); + } + + @Override + public void output(TupleTag tag, T output) { + FnDataReceiver> consumer = + (FnDataReceiver) localNameToConsumer.get(tag.getId()); + if (consumer == null) { + throw new IllegalArgumentException(String.format("Unknown output tag %s", tag)); + } + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo( + consumer, + WindowedValue.of( + output, currentElement.getTimestamp(), currentWindow, currentElement.getPane())); + } + + @Override + public void outputWithTimestamp(OutputT output, Instant timestamp) { + // TODO: Check that timestamp is valid once all runners can provide proper timestamps. + outputTo( + mainOutputConsumer, + WindowedValue.of(output, timestamp, currentWindow, currentElement.getPane())); + } + + @Override + public void outputWithTimestamp(TupleTag tag, T output, Instant timestamp) { + // TODO: Check that timestamp is valid once all runners can provide proper timestamps. + FnDataReceiver> consumer = + (FnDataReceiver) localNameToConsumer.get(tag.getId()); + if (consumer == null) { + throw new IllegalArgumentException(String.format("Unknown output tag %s", tag)); + } + outputTo( + consumer, WindowedValue.of(output, timestamp, currentWindow, currentElement.getPane())); + } @Override public State state(String stateId, boolean alwaysFetched) { @@ -2232,37 +2279,62 @@ public TimerMap timerFamily(String timerFamilyId) { currentElement.getTimestamp(), currentElement.getPane()); } - - @Override - public void outputWithTimestamp(OutputT output, Instant timestamp) { - // TODO: Check that timestamp is valid once all runners can provide proper timestamps. - outputTo( - mainOutputConsumer, - WindowedValue.of(output, timestamp, currentWindow, currentElement.getPane())); - } - - @Override - public void outputWithTimestamp(TupleTag tag, T output, Instant timestamp) { - // TODO: Check that timestamp is valid once all runners can provide proper timestamps. - FnDataReceiver> consumer = - (FnDataReceiver) localNameToConsumer.get(tag.getId()); - if (consumer == null) { - throw new IllegalArgumentException(String.format("Unknown output tag %s", tag)); - } - outputTo( - consumer, WindowedValue.of(output, timestamp, currentWindow, currentElement.getPane())); - } } /** This context outputs KV>, Size>. */ private class SizedRestrictionWindowObservingProcessBundleContext - extends WindowObservingProcessBundleContext { + extends WindowObservingProcessBundleContextBase { private final String errorContextPrefix; SizedRestrictionWindowObservingProcessBundleContext(String errorContextPrefix) { this.errorContextPrefix = errorContextPrefix; } + @Override + // OutputT == RestrictionT + public void output(OutputT output) { + double size = + doFnInvoker.invokeGetSize( + new DelegatingArgumentProvider( + this, this.errorContextPrefix + "/GetSize") { + @Override + public Object restriction() { + return output; + } + + @Override + public Instant timestamp(DoFn doFn) { + return currentElement.getTimestamp(); + } + + @Override + public RestrictionTracker restrictionTracker() { + return doFnInvoker.invokeNewTracker(this); + } + }); + + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo( + mainOutputConsumer, + (WindowedValue) + WindowedValue.of( + KV.of( + KV.of( + currentElement.getValue(), KV.of(output, currentWatermarkEstimatorState)), + size), + currentElement.getTimestamp(), + currentWindow, + currentElement.getPane())); + } + + @Override + public void output(TupleTag tag, T output) { + // Note that the OutputReceiver/RowOutputReceiver specifically will use the non-tag versions + // of these methods when producing output. + throw new UnsupportedOperationException( + String.format("Non-main output %s unsupported in %s", tag, errorContextPrefix)); + } + @Override // OutputT == RestrictionT public void outputWithTimestamp(OutputT output, Instant timestamp) { @@ -2299,17 +2371,85 @@ public Instant timestamp(DoFn doFn) { currentWindow, currentElement.getPane())); } + + @Override + public void outputWithTimestamp(TupleTag tag, T output, Instant timestamp) { + // Note that the OutputReceiver/RowOutputReceiver specifically will use the non-tag versions + // of these methods when producing output. + throw new UnsupportedOperationException( + String.format("Non-main output %s unsupported in %s", tag, errorContextPrefix)); + } + + @Override + public State state(String stateId, boolean alwaysFetched) { + throw new UnsupportedOperationException( + String.format("State unsupported in %s", errorContextPrefix)); + } + + @Override + public org.apache.beam.sdk.state.Timer timer(String timerId) { + throw new UnsupportedOperationException( + String.format("Timer unsupported in %s", errorContextPrefix)); + } + + @Override + public TimerMap timerFamily(String tagId) { + throw new UnsupportedOperationException( + String.format("Timer unsupported in %s", errorContextPrefix)); + } } /** This context outputs KV>, Size>. */ private class SizedRestrictionNonWindowObservingProcessBundleContext - extends NonWindowObservingProcessBundleContext { + extends NonWindowObservingProcessBundleContextBase { private final String errorContextPrefix; SizedRestrictionNonWindowObservingProcessBundleContext(String errorContextPrefix) { this.errorContextPrefix = errorContextPrefix; } + @Override + // OutputT == RestrictionT + public void output(OutputT output) { + double size = + doFnInvoker.invokeGetSize( + new DelegatingArgumentProvider( + this, errorContextPrefix + "/GetSize") { + @Override + public Object restriction() { + return output; + } + + @Override + public Instant timestamp(DoFn doFn) { + return currentElement.getTimestamp(); + } + + @Override + public RestrictionTracker restrictionTracker() { + return doFnInvoker.invokeNewTracker(this); + } + }); + + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo( + mainOutputConsumer, + (WindowedValue) + currentElement.withValue( + KV.of( + KV.of( + currentElement.getValue(), KV.of(output, currentWatermarkEstimatorState)), + size))); + } + + @Override + public void output(TupleTag tag, T output) { + // Note that the OutputReceiver/RowOutputReceiver specifically will use the non-tag versions + // of these methods when producing output. + throw new UnsupportedOperationException( + String.format("Non-main output %s unsupported in %s", tag, errorContextPrefix)); + } + @Override // OutputT == RestrictionT public void outputWithTimestamp(OutputT output, Instant timestamp) { @@ -2346,10 +2486,37 @@ public Instant timestamp(DoFn doFn) { currentElement.getWindows(), currentElement.getPane())); } + + @Override + public void outputWithTimestamp(TupleTag tag, T output, Instant timestamp) { + // Note that the OutputReceiver/RowOutputReceiver specifically will use the non-tag versions + // of these methods when producing output. + throw new UnsupportedOperationException( + String.format("Non-main output %s unsupported in %s", tag, errorContextPrefix)); + } } /** Provides arguments for a {@link DoFnInvoker} for a non-window observing method. */ - private class NonWindowObservingProcessBundleContext extends ProcessBundleContextBase { + private class NonWindowObservingProcessBundleContext + extends NonWindowObservingProcessBundleContextBase { + + @Override + public void output(OutputT output) { + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo(mainOutputConsumer, currentElement.withValue(output)); + } + + @Override + public void output(TupleTag tag, T output) { + FnDataReceiver> consumer = + (FnDataReceiver) localNameToConsumer.get(tag.getId()); + if (consumer == null) { + throw new IllegalArgumentException(String.format("Unknown output tag %s", tag)); + } + // Don't need to check timestamp since we can always output using the input timestamp. + outputTo(consumer, currentElement.withValue(output)); + } + @Override public void outputWithTimestamp(OutputT output, Instant timestamp) { checkTimestamp(timestamp); @@ -2372,7 +2539,11 @@ public void outputWithTimestamp(TupleTag tag, T output, Instant timestamp WindowedValue.of( output, timestamp, currentElement.getWindows(), currentElement.getPane())); } + } + /** Provides base arguments for a {@link DoFnInvoker} for a non-window observing method. */ + private abstract class NonWindowObservingProcessBundleContextBase + extends ProcessBundleContextBase { @Override public BoundedWindow window() { throw new UnsupportedOperationException( @@ -2489,8 +2660,7 @@ public OutputReceiver outputReceiver(DoFn doFn) { @Override public void output(Row output) { - ProcessBundleContextBase.this.outputWithTimestamp( - fromRowFunction.apply(output), currentElement.getTimestamp()); + ProcessBundleContextBase.this.output(fromRowFunction.apply(output)); } @Override @@ -2517,14 +2687,16 @@ public OutputReceiver outputRowReceiver(DoFn doFn) { private final Map, OutputReceiver> taggedRowReceivers = new HashMap<>(); private OutputReceiver createTaggedOutputReceiver(TupleTag tag) { + // Note that it is important that we use the non-tag versions here when using the main + // output tag for performance reasons and we also rely on it for the splittable DoFn + // context objects as well. if (tag == null || mainOutputTag.equals(tag)) { return (OutputReceiver) ProcessBundleContextBase.this; } return new OutputReceiver() { @Override public void output(T output) { - ProcessBundleContextBase.this.outputWithTimestamp( - tag, output, currentElement.getTimestamp()); + ProcessBundleContextBase.this.output(tag, output); } @Override @@ -2535,6 +2707,9 @@ public void outputWithTimestamp(T output, Instant timestamp) { } private OutputReceiver createTaggedRowReceiver(TupleTag tag) { + // Note that it is important that we use the non-tag versions here when using the main + // output tag for performance reasons and we also rely on it for the splittable DoFn + // context objects as well. if (tag == null || mainOutputTag.equals(tag)) { checkState( mainOutputSchemaCoder != null, @@ -2555,8 +2730,7 @@ private OutputReceiver createTaggedRowReceiver(TupleTag tag) { @Override public void output(Row output) { - ProcessBundleContextBase.this.outputWithTimestamp( - tag, fromRowFunction.apply(output), currentElement.getTimestamp()); + ProcessBundleContextBase.this.output(tag, fromRowFunction.apply(output)); } @Override @@ -2615,16 +2789,6 @@ public PipelineOptions pipelineOptions() { return pipelineOptions; } - @Override - public void output(OutputT output) { - outputWithTimestamp(output, currentElement.getTimestamp()); - } - - @Override - public void output(TupleTag tag, T output) { - outputWithTimestamp(tag, output, currentElement.getTimestamp()); - } - @Override public InputT element() { return currentElement.getValue(); @@ -2777,8 +2941,7 @@ public OutputReceiver outputReceiver(DoFn doFn) { @Override public void output(Row output) { - context.outputWithTimestamp( - fromRowFunction.apply(output), currentElement.getTimestamp()); + context.output(fromRowFunction.apply(output)); } @Override @@ -2810,7 +2973,7 @@ private OutputReceiver createTaggedOutputReceiver(TupleTag tag) { return new OutputReceiver() { @Override public void output(T output) { - context.outputWithTimestamp(tag, output, currentElement.getTimestamp()); + context.output(tag, output); } @Override @@ -2841,8 +3004,7 @@ private OutputReceiver createTaggedRowReceiver(TupleTag tag) { @Override public void output(Row output) { - context.outputWithTimestamp( - tag, fromRowFunction.apply(output), currentElement.getTimestamp()); + context.output(tag, fromRowFunction.apply(output)); } @Override @@ -3071,7 +3233,7 @@ private OutputReceiver createTaggedOutputReceiver(TupleTag tag) { return new OutputReceiver() { @Override public void output(T output) { - context.outputWithTimestamp(tag, output, currentElement.getTimestamp()); + context.output(tag, output); } @Override @@ -3102,8 +3264,7 @@ private OutputReceiver createTaggedRowReceiver(TupleTag tag) { @Override public void output(Row output) { - context.outputWithTimestamp( - tag, fromRowFunction.apply(output), currentElement.getTimestamp()); + context.output(tag, fromRowFunction.apply(output)); } @Override diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java index 4c91874e63f6..ce78f33c14cb 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java @@ -29,6 +29,7 @@ import org.apache.beam.fn.harness.control.HarnessMonitoringInfosInstructionHandler; import org.apache.beam.fn.harness.control.ProcessBundleHandler; import org.apache.beam.fn.harness.data.BeamFnDataGrpcClient; +import org.apache.beam.fn.harness.debug.DataSampler; import org.apache.beam.fn.harness.logging.BeamFnLoggingClient; import org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache; import org.apache.beam.fn.harness.status.BeamFnStatusClient; @@ -89,6 +90,7 @@ public class FnHarness { private static final String STATUS_API_SERVICE_DESCRIPTOR = "STATUS_API_SERVICE_DESCRIPTOR"; private static final String PIPELINE_OPTIONS = "PIPELINE_OPTIONS"; private static final String RUNNER_CAPABILITIES = "RUNNER_CAPABILITIES"; + private static final String ENABLE_DATA_SAMPLING_EXPERIMENT = "enable_data_sampling"; private static final Logger LOG = LoggerFactory.getLogger(FnHarness.class); private static Endpoints.ApiServiceDescriptor getApiServiceDescriptor(String descriptor) @@ -221,6 +223,7 @@ public static void main( options.as(ExecutorOptions.class).getScheduledExecutorService(); ExecutionStateSampler executionStateSampler = new ExecutionStateSampler(options, System::currentTimeMillis); + final DataSampler dataSampler = new DataSampler(); // The logging client variable is not used per se, but during its lifetime (until close()) it // intercepts logging and sends it to the logging service. @@ -248,6 +251,12 @@ public static void main( FinalizeBundleHandler finalizeBundleHandler = new FinalizeBundleHandler(executorService); + // Create the sampler, if the experiment is enabled. + boolean shouldSample = + ExperimentalOptions.hasExperiment(options, ENABLE_DATA_SAMPLING_EXPERIMENT); + + // Retrieves the ProcessBundleDescriptor from cache. Requests the PBD from the Runner if it + // doesn't exist. Additionally, runs any graph modifications. Function getProcessBundleDescriptor = new Function() { private static final String PROCESS_BUNDLE_DESCRIPTORS = "ProcessBundleDescriptors"; @@ -279,7 +288,8 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { finalizeBundleHandler, metricsShortIds, executionStateSampler, - processWideCache); + processWideCache, + shouldSample ? dataSampler : null); logging.setProcessBundleHandler(processBundleHandler); BeamFnStatusClient beamFnStatusClient = null; @@ -327,6 +337,8 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { handlers.put( InstructionRequest.RequestCase.HARNESS_MONITORING_INFOS, processWideHandler::harnessMonitoringInfos); + handlers.put( + InstructionRequest.RequestCase.SAMPLE_DATA, dataSampler::handleDataSampleRequest); JvmInitializers.runBeforeProcessing(options); diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java index 560369a3907a..348b9a761fdf 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java @@ -52,6 +52,7 @@ import org.apache.beam.fn.harness.data.BeamFnDataClient; import org.apache.beam.fn.harness.data.PCollectionConsumerRegistry; import org.apache.beam.fn.harness.data.PTransformFunctionRegistry; +import org.apache.beam.fn.harness.debug.DataSampler; import org.apache.beam.fn.harness.state.BeamFnStateClient; import org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache; import org.apache.beam.model.fnexecution.v1.BeamFnApi; @@ -164,6 +165,7 @@ public class ProcessBundleHandler { private final Cache processWideCache; @VisibleForTesting final BundleProcessorCache bundleProcessorCache; private final Set runnerCapabilities; + private final @Nullable DataSampler dataSampler; public ProcessBundleHandler( PipelineOptions options, @@ -174,7 +176,8 @@ public ProcessBundleHandler( FinalizeBundleHandler finalizeBundleHandler, ShortIdMap shortIds, ExecutionStateSampler executionStateSampler, - Cache processWideCache) { + Cache processWideCache, + @Nullable DataSampler dataSampler) { this( options, runnerCapabilities, @@ -186,7 +189,8 @@ public ProcessBundleHandler( executionStateSampler, REGISTERED_RUNNER_FACTORIES, processWideCache, - new BundleProcessorCache()); + new BundleProcessorCache(), + dataSampler); } @VisibleForTesting @@ -201,7 +205,8 @@ public ProcessBundleHandler( ExecutionStateSampler executionStateSampler, Map urnToPTransformRunnerFactoryMap, Cache processWideCache, - BundleProcessorCache bundleProcessorCache) { + BundleProcessorCache bundleProcessorCache, + @Nullable DataSampler dataSampler) { this.options = options; this.fnApiRegistry = fnApiRegistry; this.beamFnDataClient = beamFnDataClient; @@ -218,6 +223,7 @@ public ProcessBundleHandler( new UnknownPTransformRunnerFactory(urnToPTransformRunnerFactoryMap.keySet()); this.processWideCache = processWideCache; this.bundleProcessorCache = bundleProcessorCache; + this.dataSampler = dataSampler; } private void createRunnerAndConsumersForPTransformRecursively( @@ -771,7 +777,11 @@ private BundleProcessor createBundleProcessor( bundleProgressReporterAndRegistrar.register(stateTracker); PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry( - stateTracker, shortIds, bundleProgressReporterAndRegistrar, bundleDescriptor); + stateTracker, + shortIds, + bundleProgressReporterAndRegistrar, + bundleDescriptor, + dataSampler); HashSet processedPTransformIds = new HashSet<>(); PTransformFunctionRegistry startFunctionRegistry = diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java index 45298a68d98c..5095be1be8fb 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import javax.annotation.Nullable; import org.apache.beam.fn.harness.HandlesSplits; import org.apache.beam.fn.harness.control.BundleProgressReporter; import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionState; @@ -31,6 +32,8 @@ import org.apache.beam.fn.harness.control.Metrics; import org.apache.beam.fn.harness.control.Metrics.BundleCounter; import org.apache.beam.fn.harness.control.Metrics.BundleDistribution; +import org.apache.beam.fn.harness.debug.DataSampler; +import org.apache.beam.fn.harness.debug.OutputSampler; import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.model.pipeline.v1.RunnerApi; @@ -48,7 +51,6 @@ import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder; import org.apache.beam.sdk.util.common.ElementByteSizeObserver; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; /** * The {@code PCollectionConsumerRegistry} is used to maintain a collection of consuming @@ -87,12 +89,22 @@ public static ConsumerAndMetadata forConsumer( private final BundleProgressReporter.Registrar bundleProgressReporterRegistrar; private final ProcessBundleDescriptor processBundleDescriptor; private final RehydratedComponents rehydratedComponents; + private final @Nullable DataSampler dataSampler; public PCollectionConsumerRegistry( ExecutionStateTracker stateTracker, ShortIdMap shortIdMap, BundleProgressReporter.Registrar bundleProgressReporterRegistrar, ProcessBundleDescriptor processBundleDescriptor) { + this(stateTracker, shortIdMap, bundleProgressReporterRegistrar, processBundleDescriptor, null); + } + + public PCollectionConsumerRegistry( + ExecutionStateTracker stateTracker, + ShortIdMap shortIdMap, + BundleProgressReporter.Registrar bundleProgressReporterRegistrar, + ProcessBundleDescriptor processBundleDescriptor, + @Nullable DataSampler dataSampler) { this.stateTracker = stateTracker; this.shortIdMap = shortIdMap; this.pCollectionIdsToConsumers = new HashMap<>(); @@ -106,6 +118,7 @@ public PCollectionConsumerRegistry( .putAllPcollections(processBundleDescriptor.getPcollectionsMap()) .putAllWindowingStrategies(processBundleDescriptor.getWindowingStrategiesMap()) .build()); + this.dataSampler = dataSampler; } /** @@ -201,16 +214,17 @@ public FnDataReceiver> getMultiplexingConsumer(String pCollecti if (consumerAndMetadatas.size() == 1) { ConsumerAndMetadata consumerAndMetadata = consumerAndMetadatas.get(0); if (consumerAndMetadata.getConsumer() instanceof HandlesSplits) { - return new SplittingMetricTrackingFnDataReceiver(pcId, coder, consumerAndMetadata); + return new SplittingMetricTrackingFnDataReceiver( + pcId, coder, consumerAndMetadata, dataSampler); } - return new MetricTrackingFnDataReceiver(pcId, coder, consumerAndMetadata); + return new MetricTrackingFnDataReceiver(pcId, coder, consumerAndMetadata, dataSampler); } else { /* TODO(SDF), Consider supporting splitting each consumer individually. This would never come up in the existing SDF expansion, but might be useful to support fused SDF nodes. This would require dedicated delivery of the split results to each of the consumers separately. */ return new MultiplexingMetricTrackingFnDataReceiver( - pcId, coder, ImmutableList.copyOf(consumerAndMetadatas)); + pcId, coder, consumerAndMetadatas, dataSampler); } }); } @@ -228,9 +242,13 @@ private class MetricTrackingFnDataReceiver implements FnDataReceiver sampledByteSizeDistribution; private final Coder coder; + private final @Nullable OutputSampler outputSampler; public MetricTrackingFnDataReceiver( - String pCollectionId, Coder coder, ConsumerAndMetadata consumerAndMetadata) { + String pCollectionId, + Coder coder, + ConsumerAndMetadata consumerAndMetadata, + @Nullable DataSampler dataSampler) { this.delegate = consumerAndMetadata.getConsumer(); this.executionState = consumerAndMetadata.getExecutionState(); @@ -266,6 +284,11 @@ public MetricTrackingFnDataReceiver( bundleProgressReporterRegistrar.register(sampledByteSizeUnderlyingDistribution); this.coder = coder; + if (dataSampler == null) { + this.outputSampler = null; + } else { + this.outputSampler = dataSampler.sampleOutput(pCollectionId, coder); + } } @Override @@ -276,6 +299,10 @@ public void accept(WindowedValue input) throws Exception { // we have window optimization. this.sampledByteSizeDistribution.tryUpdate(input.getValue(), this.coder); + if (outputSampler != null) { + outputSampler.sample(input.getValue()); + } + // Use the ExecutionStateTracker and enter an appropriate state to track the // Process Bundle Execution time metric and also ensure user counters can get an appropriate // metrics container. @@ -302,9 +329,13 @@ private class MultiplexingMetricTrackingFnDataReceiver private final BundleCounter elementCountCounter; private final SampleByteSizeDistribution sampledByteSizeDistribution; private final Coder coder; + private final @Nullable OutputSampler outputSampler; public MultiplexingMetricTrackingFnDataReceiver( - String pCollectionId, Coder coder, List consumerAndMetadatas) { + String pCollectionId, + Coder coder, + List consumerAndMetadatas, + @Nullable DataSampler dataSampler) { this.consumerAndMetadatas = consumerAndMetadatas; HashMap labels = new HashMap<>(); @@ -339,6 +370,11 @@ public MultiplexingMetricTrackingFnDataReceiver( bundleProgressReporterRegistrar.register(sampledByteSizeUnderlyingDistribution); this.coder = coder; + if (dataSampler == null) { + this.outputSampler = null; + } else { + this.outputSampler = dataSampler.sampleOutput(pCollectionId, coder); + } } @Override @@ -349,10 +385,16 @@ public void accept(WindowedValue input) throws Exception { // when we have window optimization. this.sampledByteSizeDistribution.tryUpdate(input.getValue(), coder); + if (outputSampler != null) { + outputSampler.sample(input.getValue()); + } + // Use the ExecutionStateTracker and enter an appropriate state to track the // Process Bundle Execution time metric and also ensure user counters can get an appropriate - // metrics container. - for (ConsumerAndMetadata consumerAndMetadata : consumerAndMetadatas) { + // metrics container. We specifically don't use a for-each loop since it creates an iterator + // on a hot path. + for (int size = consumerAndMetadatas.size(), i = 0; i < size; ++i) { + ConsumerAndMetadata consumerAndMetadata = consumerAndMetadatas.get(i); ExecutionState state = consumerAndMetadata.getExecutionState(); state.activate(); try { @@ -377,8 +419,11 @@ private class SplittingMetricTrackingFnDataReceiver extends MetricTrackingFnD private final HandlesSplits delegate; public SplittingMetricTrackingFnDataReceiver( - String pCollection, Coder coder, ConsumerAndMetadata consumerAndMetadata) { - super(pCollection, coder, consumerAndMetadata); + String pCollection, + Coder coder, + ConsumerAndMetadata consumerAndMetadata, + @Nullable DataSampler dataSampler) { + super(pCollection, coder, consumerAndMetadata, dataSampler); this.delegate = (HandlesSplits) consumerAndMetadata.getConsumer(); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java new file mode 100644 index 000000000000..2a13b5dac3d3 --- /dev/null +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.fn.harness.debug; + +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.model.fnexecution.v1.BeamFnApi.SampleDataResponse.ElementList; +import org.apache.beam.sdk.coders.Coder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The DataSampler is a global (per SDK Harness) object that facilitates taking and returning + * samples to the Runner Harness. The class is thread-safe with respect to executing + * ProcessBundleDescriptors. Meaning, different threads executing different PBDs can sample + * simultaneously, even if computing the same logical PCollection. + */ +public class DataSampler { + private static final Logger LOG = LoggerFactory.getLogger(DataSampler.class); + + /** + * Creates a DataSampler to sample every 1000 elements while keeping a maximum of 10 in memory. + */ + public DataSampler() { + this(10, 1000); + } + + /** + * @param maxSamples Sets the maximum number of samples held in memory at once. + * @param sampleEveryN Sets how often to sample. + */ + public DataSampler(int maxSamples, int sampleEveryN) { + checkArgument( + maxSamples > 0, + "Expected positive number of samples, did you mean to disable data sampling?"); + checkArgument( + sampleEveryN > 0, + "Expected positive number for sampling period, did you mean to disable data sampling?"); + this.maxSamples = maxSamples; + this.sampleEveryN = sampleEveryN; + } + + // Maximum number of elements in buffer. + private final int maxSamples; + + // Sampling rate. + private final int sampleEveryN; + + // The fully-qualified type is: Map[PCollectionId, OutputSampler]. In order to sample + // on a PCollection-basis and not per-bundle, this keeps track of shared samples between states. + private final Map> outputSamplers = new ConcurrentHashMap<>(); + + /** + * Creates and returns a class to sample the given PCollection in the given + * ProcessBundleDescriptor. Uses the given coder encode samples as bytes when responding to a + * SampleDataRequest. + * + *

Invoked by multiple bundle processing threads in parallel when a new bundle processor is + * being instantiated. + * + * @param pcollectionId The PCollection to take intermittent samples from. + * @param coder The coder associated with the PCollection. Coder may be from a nested context. + * @param The type of element contained in the PCollection. + * @return the OutputSampler corresponding to the unique PBD and PCollection. + */ + public OutputSampler sampleOutput(String pcollectionId, Coder coder) { + return (OutputSampler) + outputSamplers.computeIfAbsent( + pcollectionId, k -> new OutputSampler<>(coder, this.maxSamples, this.sampleEveryN)); + } + + /** + * Returns all collected samples. Thread-safe. + * + * @param request The instruction request from the FnApi. Filters based on the given + * SampleDataRequest. + * @return Returns all collected samples. + */ + public synchronized BeamFnApi.InstructionResponse.Builder handleDataSampleRequest( + BeamFnApi.InstructionRequest request) { + BeamFnApi.SampleDataRequest sampleDataRequest = request.getSampleData(); + + List pcollections = sampleDataRequest.getPcollectionIdsList(); + + // Safe to iterate as the ConcurrentHashMap will return each element at most once and will not + // throw ConcurrentModificationException. + BeamFnApi.SampleDataResponse.Builder response = BeamFnApi.SampleDataResponse.newBuilder(); + outputSamplers.forEach( + (pcollectionId, outputSampler) -> { + if (!pcollections.isEmpty() && !pcollections.contains(pcollectionId)) { + return; + } + + try { + response.putElementSamples( + pcollectionId, + ElementList.newBuilder().addAllElements(outputSampler.samples()).build()); + } catch (IOException e) { + LOG.warn("Could not encode elements from \"" + pcollectionId + "\" to bytes: " + e); + } + }); + + return BeamFnApi.InstructionResponse.newBuilder().setSampleData(response); + } +} diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java new file mode 100644 index 000000000000..326f2dbfe8f3 --- /dev/null +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.fn.harness.debug; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.util.ByteStringOutputStream; + +/** + * This class holds samples for a single PCollection until queried by the parent DataSampler. This + * class is meant to hold only a limited number of elements in memory. So old values are constantly + * being overridden in a circular buffer. + * + * @param the element type of the PCollection. + */ +public class OutputSampler { + + // Temporarily holds elements until the SDK receives a sample data request. + private List buffer; + + // Maximum number of elements in buffer. + private final int maxElements; + + // Sampling rate. + private final int sampleEveryN; + + // Total number of samples taken. + private final AtomicLong numSamples = new AtomicLong(); + + // Index into the buffer of where to overwrite samples. + private int resampleIndex = 0; + + private final Coder coder; + + public OutputSampler(Coder coder, int maxElements, int sampleEveryN) { + this.coder = coder; + this.maxElements = maxElements; + this.sampleEveryN = sampleEveryN; + this.buffer = new ArrayList<>(this.maxElements); + } + + /** + * Samples every {@code sampleEveryN}th element or if it is part of the first 10 in the (local) + * PCollection. + * + *

This method is invoked in parallel by multiple bundle processing threads and in parallel to + * any {@link #samples} being returned to a thread handling a sample request. + * + * @param element the element to sample. + */ + public void sample(T element) { + // Only sample the first 10 elements then after every `sampleEveryN`th element. + long samples = numSamples.get() + 1; + + // This has eventual consistency. If there are many threads lazy setting, this will be set to + // the slowest thread accessing the atomic. But over time, it will still increase. This is ok + // because this is a debugging feature and doesn't need strict atomics. + numSamples.lazySet(samples); + if (samples > 10 && samples % sampleEveryN != 0) { + return; + } + + synchronized (this) { + // Fill buffer until maxElements. + if (buffer.size() < maxElements) { + buffer.add(element); + } else { + // Then rewrite sampled elements as a circular buffer. + buffer.set(resampleIndex, element); + resampleIndex = (resampleIndex + 1) % maxElements; + } + } + } + + /** + * Clears samples at end of call. This is to help mitigate memory use. + * + *

This method is invoked by a thread handling a data sampling request in parallel to any calls + * to {@link #sample}. + * + * @return samples taken since last call. + */ + public List samples() throws IOException { + List ret = new ArrayList<>(); + + // Serializing can take a lot of CPU time for larger or complex elements. Copy the array here + // so as to not slow down the main processing hot path. + List bufferToSend; + int sampleIndex = 0; + synchronized (this) { + bufferToSend = buffer; + sampleIndex = resampleIndex; + buffer = new ArrayList<>(maxElements); + resampleIndex = 0; + } + + ByteStringOutputStream stream = new ByteStringOutputStream(); + for (int i = 0; i < bufferToSend.size(); i++) { + int index = (sampleIndex + i) % bufferToSend.size(); + // This is deprecated, but until this is fully removed, this specifically needs the nested + // context. This is because the SDK will need to decode the sampled elements with the + // ToStringFn. + coder.encode(bufferToSend.get(index), stream, Coder.Context.NESTED); + ret.add( + BeamFnApi.SampledElement.newBuilder().setElement(stream.toByteStringAndReset()).build()); + } + + return ret; + } +} diff --git a/.test-infra/jenkins/job_PreCommit_Java_Jms_IO_Direct.groovy b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/package-info.java similarity index 59% rename from .test-infra/jenkins/job_PreCommit_Java_Jms_IO_Direct.groovy rename to sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/package-info.java index 77931f346ecf..978bcd346d47 100644 --- a/.test-infra/jenkins/job_PreCommit_Java_Jms_IO_Direct.groovy +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/package-info.java @@ -16,26 +16,5 @@ * limitations under the License. */ -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Java_Jms_IO_Direct', - gradleTasks: [ - ':sdks:java:io:jms:build', - ], - gradleSwitches: [ - '-PdisableSpotlessCheck=true', - '-PdisableCheckStyle=true' - ], // spotless checked in separate pre-commit - triggerPathPatterns: [ - '^sdks/java/core/src/main/.*$', - '^sdks/java/io/jms/.*$', - ], - timeoutMins: 60, - ) -builder.build { - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } -} +/** Classes and utilities related to debugging features. */ +package org.apache.beam.fn.harness.debug; diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java index 7df9ed2f894d..52bb72f97894 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java @@ -376,7 +376,8 @@ public void testTrySplitBeforeBundleDoesNotFail() { executionStateSampler, ImmutableMap.of(), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); BeamFnApi.InstructionResponse response = handler @@ -406,7 +407,8 @@ public void testProgressBeforeBundleDoesNotFail() throws Exception { executionStateSampler, ImmutableMap.of(), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); handler.progress( BeamFnApi.InstructionRequest.newBuilder() @@ -485,7 +487,8 @@ public void testOrderOfStartAndFinishCalls() throws Exception { DATA_INPUT_URN, startFinishRecorder, DATA_OUTPUT_URN, startFinishRecorder), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); handler.processBundle( BeamFnApi.InstructionRequest.newBuilder() @@ -589,7 +592,8 @@ public void testOrderOfSetupTeardownCalls() throws Exception { executionStateSampler, urnToPTransformRunnerFactoryMap, Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); handler.processBundle( BeamFnApi.InstructionRequest.newBuilder() @@ -640,7 +644,8 @@ public void testBundleProcessorIsResetWhenAddedBackToCache() throws Exception { executionStateSampler, ImmutableMap.of(DATA_INPUT_URN, (context) -> null), Caches.noop(), - new TestBundleProcessorCache()); + new TestBundleProcessorCache(), + null /* dataSampler */); assertThat(TestBundleProcessor.resetCnt, equalTo(0)); @@ -806,7 +811,8 @@ public void testCreatingPTransformExceptionsArePropagated() throws Exception { throw new IllegalStateException("TestException"); }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "TestException", IllegalStateException.class, @@ -856,7 +862,8 @@ public void testBundleFinalizationIsPropagated() throws Exception { return null; }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); BeamFnApi.InstructionResponse.Builder response = handler.processBundle( BeamFnApi.InstructionRequest.newBuilder() @@ -909,7 +916,8 @@ public void testPTransformStartExceptionsArePropagated() { return null; }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "TestException", IllegalStateException.class, @@ -1086,7 +1094,8 @@ public void onCompleted() {} executionStateSampler, urnToPTransformRunnerFactoryMap, Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); } @Test @@ -1418,7 +1427,8 @@ public void testInstructionIsUnregisteredFromBeamFnDataClientOnSuccess() throws return null; }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); handler.processBundle( BeamFnApi.InstructionRequest.newBuilder() .setInstructionId("instructionId") @@ -1490,7 +1500,8 @@ public void testDataProcessingExceptionsArePropagated() throws Exception { return null; }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "TestException", IllegalStateException.class, @@ -1539,7 +1550,8 @@ public void testPTransformFinishExceptionsArePropagated() throws Exception { return null; }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "TestException", IllegalStateException.class, @@ -1634,7 +1646,8 @@ private void doStateCalls(BeamFnStateClient beamFnStateClient) { } }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); handler.processBundle( BeamFnApi.InstructionRequest.newBuilder() .setProcessBundle( @@ -1684,7 +1697,8 @@ private void doStateCalls(BeamFnStateClient beamFnStateClient) { } }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "State API calls are unsupported", IllegalStateException.class, @@ -1786,7 +1800,8 @@ public void reset() { executionStateSampler, ImmutableMap.of(DATA_INPUT_URN, startFinishGuard), Caches.noop(), - bundleProcessorCache); + bundleProcessorCache, + null /* dataSampler */); AtomicBoolean progressShouldExit = new AtomicBoolean(); Future bundleProcessorTask = @@ -1914,7 +1929,8 @@ public Object createRunnerForPTransform(Context context) throws IOException { } }), Caches.noop(), - new BundleProcessorCache()); + new BundleProcessorCache(), + null /* dataSampler */); assertThrows( "Timers are unsupported", IllegalStateException.class, diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java index 35bd5697adc0..c24f016b5cc1 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java @@ -21,6 +21,8 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; @@ -39,6 +41,8 @@ import org.apache.beam.fn.harness.control.BundleProgressReporter; import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker; +import org.apache.beam.fn.harness.debug.DataSampler; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection; @@ -56,6 +60,7 @@ import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.metrics.MetricsEnvironment; import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.util.ByteStringOutputStream; import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterable; import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterator; @@ -507,6 +512,61 @@ public void testLazyByteSizeEstimation() throws Exception { assertThat(result, containsInAnyOrder(expected.toArray())); } + /** + * Test that element samples are taken when a DataSampler is present. + * + * @throws Exception + */ + @Test + public void dataSampling() throws Exception { + final String pTransformIdA = "pTransformIdA"; + + ShortIdMap shortIds = new ShortIdMap(); + BundleProgressReporter.InMemory reporterAndRegistrar = new BundleProgressReporter.InMemory(); + DataSampler dataSampler = new DataSampler(); + PCollectionConsumerRegistry consumers = + new PCollectionConsumerRegistry( + sampler.create(), shortIds, reporterAndRegistrar, TEST_DESCRIPTOR, dataSampler); + FnDataReceiver> consumerA1 = mock(FnDataReceiver.class); + + consumers.register(P_COLLECTION_A, pTransformIdA, pTransformIdA + "Name", consumerA1); + + FnDataReceiver> wrapperConsumer = + (FnDataReceiver>) + (FnDataReceiver) consumers.getMultiplexingConsumer(P_COLLECTION_A); + String elementValue = "elem"; + WindowedValue element = valueInGlobalWindow(elementValue); + int numElements = 10; + for (int i = 0; i < numElements; i++) { + wrapperConsumer.accept(element); + } + + BeamFnApi.InstructionRequest request = + BeamFnApi.InstructionRequest.newBuilder() + .setSampleData(BeamFnApi.SampleDataRequest.newBuilder()) + .build(); + BeamFnApi.InstructionResponse response = dataSampler.handleDataSampleRequest(request).build(); + + Map elementSamplesMap = + response.getSampleData().getElementSamplesMap(); + + assertFalse(elementSamplesMap.isEmpty()); + + BeamFnApi.SampleDataResponse.ElementList elementList = elementSamplesMap.get(P_COLLECTION_A); + assertNotNull(elementList); + + List expectedSamples = new ArrayList<>(); + StringUtf8Coder coder = StringUtf8Coder.of(); + for (int i = 0; i < numElements; i++) { + ByteStringOutputStream stream = new ByteStringOutputStream(); + coder.encode(elementValue, stream); + expectedSamples.add( + BeamFnApi.SampledElement.newBuilder().setElement(stream.toByteStringAndReset()).build()); + } + + assertTrue(elementList.getElementsList().containsAll(expectedSamples)); + } + private static class TestElementByteSizeObservableIterable extends ElementByteSizeObservableIterable> { private List elements; diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java new file mode 100644 index 000000000000..4b874dd7e980 --- /dev/null +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.fn.harness.debug; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.sdk.coders.ByteArrayCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.vendor.grpc.v1p48p1.com.google.protobuf.ByteString; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class DataSamplerTest { + byte[] encodeInt(Integer i) throws IOException { + VarIntCoder coder = VarIntCoder.of(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + coder.encode(i, stream, Coder.Context.NESTED); + return stream.toByteArray(); + } + + byte[] encodeString(String s) throws IOException { + StringUtf8Coder coder = StringUtf8Coder.of(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + coder.encode(s, stream, Coder.Context.NESTED); + return stream.toByteArray(); + } + + byte[] encodeByteArray(byte[] b) throws IOException { + ByteArrayCoder coder = ByteArrayCoder.of(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + coder.encode(b, stream, Coder.Context.NESTED); + return stream.toByteArray(); + } + + BeamFnApi.InstructionResponse getAllSamples(DataSampler dataSampler) { + BeamFnApi.InstructionRequest request = + BeamFnApi.InstructionRequest.newBuilder() + .setSampleData(BeamFnApi.SampleDataRequest.newBuilder().build()) + .build(); + return dataSampler.handleDataSampleRequest(request).build(); + } + + BeamFnApi.InstructionResponse getSamplesForPCollection( + DataSampler dataSampler, String pcollection) { + BeamFnApi.InstructionRequest request = + BeamFnApi.InstructionRequest.newBuilder() + .setSampleData( + BeamFnApi.SampleDataRequest.newBuilder().addPcollectionIds(pcollection).build()) + .build(); + return dataSampler.handleDataSampleRequest(request).build(); + } + + BeamFnApi.InstructionResponse getSamplesForPCollections( + DataSampler dataSampler, Iterable pcollections) { + BeamFnApi.InstructionRequest request = + BeamFnApi.InstructionRequest.newBuilder() + .setSampleData( + BeamFnApi.SampleDataRequest.newBuilder().addAllPcollectionIds(pcollections).build()) + .build(); + return dataSampler.handleDataSampleRequest(request).build(); + } + + void assertHasSamples( + BeamFnApi.InstructionResponse response, String pcollection, Iterable elements) { + Map elementSamplesMap = + response.getSampleData().getElementSamplesMap(); + + assertFalse(elementSamplesMap.isEmpty()); + + BeamFnApi.SampleDataResponse.ElementList elementList = elementSamplesMap.get(pcollection); + assertNotNull(elementList); + + List expectedSamples = new ArrayList<>(); + for (byte[] el : elements) { + expectedSamples.add( + BeamFnApi.SampledElement.newBuilder().setElement(ByteString.copyFrom(el)).build()); + } + + assertTrue(elementList.getElementsList().containsAll(expectedSamples)); + } + + /** + * Smoke test that a samples show in the output map. + * + * @throws Exception + */ + @Test + public void testSingleOutput() throws Exception { + DataSampler sampler = new DataSampler(); + + VarIntCoder coder = VarIntCoder.of(); + sampler.sampleOutput("pcollection-id", coder).sample(1); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + assertHasSamples(samples, "pcollection-id", Collections.singleton(encodeInt(1))); + } + + /** + * Smoke test that a sample shows in the output map. + * + * @throws Exception + */ + @Test + public void testNestedContext() throws Exception { + DataSampler sampler = new DataSampler(); + + String rawString = "hello"; + byte[] byteArray = rawString.getBytes(StandardCharsets.US_ASCII); + ByteArrayCoder coder = ByteArrayCoder.of(); + sampler.sampleOutput("pcollection-id", coder).sample(byteArray); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + assertHasSamples(samples, "pcollection-id", Collections.singleton(encodeByteArray(byteArray))); + } + + /** + * Test that sampling multiple PCollections under the same descriptor is OK. + * + * @throws Exception + */ + @Test + public void testMultipleOutputs() throws Exception { + DataSampler sampler = new DataSampler(); + + VarIntCoder coder = VarIntCoder.of(); + sampler.sampleOutput("pcollection-id-1", coder).sample(1); + sampler.sampleOutput("pcollection-id-2", coder).sample(2); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + assertHasSamples(samples, "pcollection-id-1", Collections.singleton(encodeInt(1))); + assertHasSamples(samples, "pcollection-id-2", Collections.singleton(encodeInt(2))); + } + + /** + * Test that the response contains samples from the same PCollection across descriptors. + * + * @throws Exception + */ + @Test + public void testMultipleSamePCollections() throws Exception { + DataSampler sampler = new DataSampler(); + + VarIntCoder coder = VarIntCoder.of(); + sampler.sampleOutput("pcollection-id", coder).sample(1); + sampler.sampleOutput("pcollection-id", coder).sample(2); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + assertHasSamples(samples, "pcollection-id", ImmutableList.of(encodeInt(1), encodeInt(2))); + } + + void generateStringSamples(DataSampler sampler) { + StringUtf8Coder coder = StringUtf8Coder.of(); + sampler.sampleOutput("a", coder).sample("a1"); + sampler.sampleOutput("a", coder).sample("a2"); + sampler.sampleOutput("b", coder).sample("b1"); + sampler.sampleOutput("b", coder).sample("b2"); + sampler.sampleOutput("c", coder).sample("c1"); + sampler.sampleOutput("c", coder).sample("c2"); + } + + /** + * Test that samples can be filtered based on PCollection id. + * + * @throws Exception + */ + @Test + public void testFiltersSinglePCollectionId() throws Exception { + DataSampler sampler = new DataSampler(10, 10); + generateStringSamples(sampler); + + BeamFnApi.InstructionResponse samples = getSamplesForPCollection(sampler, "a"); + assertHasSamples(samples, "a", ImmutableList.of(encodeString("a1"), encodeString("a2"))); + } + + /** + * Test that samples can be filtered both on PCollection and ProcessBundleDescriptor id. + * + * @throws Exception + */ + @Test + public void testFiltersMultiplePCollectionIds() throws Exception { + List pcollectionIds = ImmutableList.of("a", "c"); + + DataSampler sampler = new DataSampler(10, 10); + generateStringSamples(sampler); + + BeamFnApi.InstructionResponse samples = getSamplesForPCollections(sampler, pcollectionIds); + assertThat(samples.getSampleData().getElementSamplesMap().size(), equalTo(2)); + assertHasSamples(samples, "a", ImmutableList.of(encodeString("a1"), encodeString("a2"))); + assertHasSamples(samples, "c", ImmutableList.of(encodeString("c1"), encodeString("c2"))); + } + + /** + * Test that samples can be taken from the DataSampler while adding new OutputSamplers. This fails + * with a ConcurrentModificationException if there is a bug. + * + * @throws Exception + */ + @Test + public void testConcurrentNewSampler() throws Exception { + DataSampler sampler = new DataSampler(); + VarIntCoder coder = VarIntCoder.of(); + + // Make threads that will create 100 individual OutputSamplers each. + Thread[] sampleThreads = new Thread[100]; + CountDownLatch startSignal = new CountDownLatch(1); + CountDownLatch doneSignal = new CountDownLatch(sampleThreads.length); + + for (int i = 0; i < sampleThreads.length; i++) { + sampleThreads[i] = + new Thread( + () -> { + try { + startSignal.await(); + } catch (InterruptedException e) { + return; + } + + for (int j = 0; j < 100; j++) { + sampler.sampleOutput("pcollection-" + j, coder).sample(0); + } + + doneSignal.countDown(); + }); + sampleThreads[i].start(); + } + + startSignal.countDown(); + while (doneSignal.getCount() > 0) { + sampler.handleDataSampleRequest( + BeamFnApi.InstructionRequest.newBuilder() + .setSampleData(BeamFnApi.SampleDataRequest.newBuilder()) + .build()); + } + + for (Thread sampleThread : sampleThreads) { + sampleThread.join(); + } + } +} diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java new file mode 100644 index 000000000000..953ccce9e235 --- /dev/null +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.fn.harness.debug; + +import static junit.framework.TestCase.assertEquals; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.vendor.grpc.v1p48p1.com.google.protobuf.ByteString; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class OutputSamplerTest { + public BeamFnApi.SampledElement encodeInt(Integer i) throws IOException { + VarIntCoder coder = VarIntCoder.of(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + coder.encode(i, stream); + return BeamFnApi.SampledElement.newBuilder() + .setElement(ByteString.copyFrom(stream.toByteArray())) + .build(); + } + + /** + * Test that the first N are always sampled. + * + * @throws Exception when encoding fails (shouldn't happen). + */ + @Test + public void testSamplesFirstN() throws Exception { + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + + // Purposely go over maxSamples and sampleEveryN. This helps to increase confidence. + for (int i = 0; i < 15; ++i) { + outputSampler.sample(i); + } + + // The expected list is only 0..9 inclusive. + List expected = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + expected.add(encodeInt(i)); + } + + List samples = outputSampler.samples(); + assertThat(samples, containsInAnyOrder(expected.toArray())); + } + + /** + * Test that the previous values are overwritten and only the most recent `maxSamples` are kept. + * + * @throws Exception when encoding fails (shouldn't happen). + */ + @Test + public void testActsLikeCircularBuffer() throws Exception { + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + + for (int i = 0; i < 100; ++i) { + outputSampler.sample(i); + } + + // The first 10 are always sampled, but with maxSamples = 5, the first ten are downsampled to + // 4..9 inclusive. Then, + // the 20th element is sampled (19) and every 20 after. + List expected = new ArrayList<>(); + expected.add(encodeInt(19)); + expected.add(encodeInt(39)); + expected.add(encodeInt(59)); + expected.add(encodeInt(79)); + expected.add(encodeInt(99)); + + List samples = outputSampler.samples(); + assertThat(samples, containsInAnyOrder(expected.toArray())); + } + + /** + * Test that sampling a PCollection while retrieving samples from multiple threads is ok. + * + * @throws Exception + */ + @Test + public void testConcurrentSamples() throws Exception { + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 2); + + CountDownLatch startSignal = new CountDownLatch(1); + CountDownLatch doneSignal = new CountDownLatch(2); + + // Iteration count was empirically chosen to have a high probability of failure without the + // test going for too long. + // Generates a range of numbers from 0 to 1000000. + Thread sampleThreadA = + new Thread( + () -> { + try { + startSignal.await(); + } catch (InterruptedException e) { + return; + } + + for (int i = 0; i < 1000000; i++) { + outputSampler.sample(i); + } + + doneSignal.countDown(); + }); + + // Generates a range of numbers from -1000000 to 0. + Thread sampleThreadB = + new Thread( + () -> { + try { + startSignal.await(); + } catch (InterruptedException e) { + return; + } + + for (int i = -1000000; i < 0; i++) { + outputSampler.sample(i); + } + + doneSignal.countDown(); + }); + + // Ready the threads. + sampleThreadA.start(); + sampleThreadB.start(); + + // Start the threads at the same time. + startSignal.countDown(); + + // Generate contention by sampling at the same time as the samples are generated. + List samples = new ArrayList<>(); + while (doneSignal.getCount() > 0) { + samples.addAll(outputSampler.samples()); + } + + // Stop the threads and sort the samples from which thread it came from. + sampleThreadA.join(); + sampleThreadB.join(); + List samplesFromThreadA = new ArrayList<>(); + List samplesFromThreadB = new ArrayList<>(); + for (BeamFnApi.SampledElement sampledElement : samples) { + int el = coder.decode(sampledElement.getElement().newInput()); + if (el >= 0) { + samplesFromThreadA.add(el); + } else { + samplesFromThreadB.add(el); + } + } + + // Copy the array and sort it. + List sortedSamplesFromThreadA = new ArrayList<>(samplesFromThreadA); + List sortedSamplesFromThreadB = new ArrayList<>(samplesFromThreadB); + Collections.sort(sortedSamplesFromThreadA); + Collections.sort(sortedSamplesFromThreadB); + + // Order is preserved when getting the samples. If there is a weird race condition, these + // numbers may be out of order. + assertEquals(samplesFromThreadA, sortedSamplesFromThreadA); + assertEquals(samplesFromThreadB, sortedSamplesFromThreadB); + } +} diff --git a/sdks/java/io/amazon-web-services/build.gradle b/sdks/java/io/amazon-web-services/build.gradle index 23fd55225589..9cc453572e1a 100644 --- a/sdks/java/io/amazon-web-services/build.gradle +++ b/sdks/java/io/amazon-web-services/build.gradle @@ -51,6 +51,7 @@ dependencies { runtimeOnly library.java.commons_codec runtimeOnly "org.apache.httpcomponents:httpclient:4.5.12" testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation "io.findify:s3mock_2.12:0.2.6" testImplementation library.java.hamcrest diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java index f0813ef4eca0..92f31977d8b2 100644 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java +++ b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java @@ -50,8 +50,8 @@ import java.util.stream.IntStream; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO.RetryConfiguration; import org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO.Write.WriteFn; import org.apache.beam.sdk.testing.ExpectedLogs; diff --git a/sdks/java/io/amazon-web-services2/build.gradle b/sdks/java/io/amazon-web-services2/build.gradle index 5b25cde8f0e0..95d0649d8306 100644 --- a/sdks/java/io/amazon-web-services2/build.gradle +++ b/sdks/java/io/amazon-web-services2/build.gradle @@ -66,6 +66,7 @@ dependencies { implementation library.java.commons_lang3 implementation library.java.commons_codec testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation "io.findify:s3mock_2.12:0.2.6" testImplementation 'org.elasticmq:elasticmq-rest-sqs_2.12:1.3.5' diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/dynamodb/DynamoDBIOWriteTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/dynamodb/DynamoDBIOWriteTest.java index 64cb3f44692f..b410ec916d6d 100644 --- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/dynamodb/DynamoDBIOWriteTest.java +++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/dynamodb/DynamoDBIOWriteTest.java @@ -41,8 +41,8 @@ import java.util.function.Supplier; import java.util.stream.IntStream; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.aws2.MockClientBuilderFactory; import org.apache.beam.sdk.io.aws2.common.ClientConfiguration; import org.apache.beam.sdk.io.aws2.common.RetryConfiguration; diff --git a/sdks/java/io/cdap/build.gradle b/sdks/java/io/cdap/build.gradle index 0a1e12a87e14..a9122d1c8f6d 100644 --- a/sdks/java/io/cdap/build.gradle +++ b/sdks/java/io/cdap/build.gradle @@ -69,6 +69,7 @@ dependencies { testImplementation library.java.junit testImplementation library.java.mockito_core testImplementation library.java.testcontainers_postgresql + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:hadoop-common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:hadoop-format", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") diff --git a/sdks/java/io/cdap/src/test/java/org/apache/beam/sdk/io/cdap/TestRowDBWritable.java b/sdks/java/io/cdap/src/test/java/org/apache/beam/sdk/io/cdap/TestRowDBWritable.java index d85c5ea3e69e..ef17d1601dd7 100644 --- a/sdks/java/io/cdap/src/test/java/org/apache/beam/sdk/io/cdap/TestRowDBWritable.java +++ b/sdks/java/io/cdap/src/test/java/org/apache/beam/sdk/io/cdap/TestRowDBWritable.java @@ -23,8 +23,8 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.common.TestRow; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.lib.db.DBWritable; diff --git a/sdks/java/io/file-based-io-tests/build.gradle b/sdks/java/io/file-based-io-tests/build.gradle index 3a23d6ceeabc..14abdbfbae26 100644 --- a/sdks/java/io/file-based-io-tests/build.gradle +++ b/sdks/java/io/file-based-io-tests/build.gradle @@ -26,6 +26,7 @@ ext.summary = "Integration tests for reading/writing using file-based sources/si dependencies { testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:xml", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:parquet", configuration: "testRuntimeMigration") diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java index 832c69af1c44..684cf4d3b639 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java @@ -31,8 +31,8 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOIT.java index 7db703b7b02b..0526b5d66d7b 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOIT.java @@ -31,7 +31,7 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; diff --git a/sdks/java/io/file-schema-transform/build.gradle b/sdks/java/io/file-schema-transform/build.gradle index f018e250cc28..19b10eacf7b1 100644 --- a/sdks/java/io/file-schema-transform/build.gradle +++ b/sdks/java/io/file-schema-transform/build.gradle @@ -41,6 +41,7 @@ dependencies { implementation library.java.joda_time implementation library.java.vendored_guava_26_0_jre implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(path: ":sdks:java:io:csv") implementation project(path: ":sdks:java:io:parquet") implementation "org.apache.parquet:parquet-common:$parquet_version" diff --git a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/AvroWriteSchemaTransformFormatProvider.java b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/AvroWriteSchemaTransformFormatProvider.java index 75cf841beabe..ccf61fddb872 100644 --- a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/AvroWriteSchemaTransformFormatProvider.java +++ b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/AvroWriteSchemaTransformFormatProvider.java @@ -22,10 +22,10 @@ import com.google.auto.service.AutoService; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.coders.AvroGenericCoder; -import org.apache.beam.sdk.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.Values; import org.apache.beam.sdk.values.PCollection; diff --git a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/FileWriteSchemaTransformFormatProviders.java b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/FileWriteSchemaTransformFormatProviders.java index 1148f2ee116e..f1b00db896d7 100644 --- a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/FileWriteSchemaTransformFormatProviders.java +++ b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/FileWriteSchemaTransformFormatProviders.java @@ -23,12 +23,12 @@ import java.util.Optional; import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.io.Providers; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.values.Row; import org.apache.beam.sdk.values.TypeDescriptor; diff --git a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/ParquetWriteSchemaTransformFormatProvider.java b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/ParquetWriteSchemaTransformFormatProvider.java index 6981844d229f..aad54e698bb4 100644 --- a/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/ParquetWriteSchemaTransformFormatProvider.java +++ b/sdks/java/io/file-schema-transform/src/main/java/org/apache/beam/sdk/io/fileschematransform/ParquetWriteSchemaTransformFormatProvider.java @@ -24,11 +24,11 @@ import java.util.Optional; import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.coders.AvroGenericCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.fileschematransform.FileWriteSchemaTransformConfiguration.ParquetConfiguration; import org.apache.beam.sdk.io.parquet.ParquetIO; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.Values; import org.apache.beam.sdk.values.PCollection; diff --git a/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/AvroFileWriteSchemaTransformFormatProviderTest.java b/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/AvroFileWriteSchemaTransformFormatProviderTest.java index 466ea475d310..b304d81a0fdc 100644 --- a/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/AvroFileWriteSchemaTransformFormatProviderTest.java +++ b/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/AvroFileWriteSchemaTransformFormatProviderTest.java @@ -23,10 +23,10 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.coders.AvroGenericCoder; -import org.apache.beam.sdk.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.coders.AvroGenericCoder; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.Row; diff --git a/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/ParquetFileWriteSchemaTransformFormatProviderTest.java b/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/ParquetFileWriteSchemaTransformFormatProviderTest.java index ea51f235c7f0..1bd094cfdad4 100644 --- a/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/ParquetFileWriteSchemaTransformFormatProviderTest.java +++ b/sdks/java/io/file-schema-transform/src/test/java/org/apache/beam/sdk/io/fileschematransform/ParquetFileWriteSchemaTransformFormatProviderTest.java @@ -24,9 +24,9 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.avro.generic.GenericRecord; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.parquet.ParquetIO; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.Row; diff --git a/sdks/java/io/google-cloud-platform/build.gradle b/sdks/java/io/google-cloud-platform/build.gradle index d4a143c61730..076c90c56464 100644 --- a/sdks/java/io/google-cloud-platform/build.gradle +++ b/sdks/java/io/google-cloud-platform/build.gradle @@ -37,6 +37,7 @@ dependencies { implementation project(":runners:core-java") implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(":sdks:java:expansion-service") + implementation project(":sdks:java:extensions:avro") permitUnusedDeclared project(":sdks:java:expansion-service") // BEAM-11761 implementation project(":sdks:java:extensions:google-cloud-platform-core") implementation project(":sdks:java:extensions:protobuf") @@ -155,6 +156,7 @@ dependencies { testImplementation library.java.arrow_memory_netty testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:extensions:google-cloud-platform-core", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:extensions:protobuf", configuration: "testRuntimeMigration") testImplementation project(path: ":runners:direct-java", configuration: "shadow") diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java index 7becffa6d17f..783ced7d6ffd 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java @@ -39,7 +39,7 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.schemas.utils.AvroUtils.TypeWithNullability; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils.TypeWithNullability; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Functions; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java index 4f9f0b709262..6745f7aceea0 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java @@ -68,14 +68,17 @@ import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderRegistry; import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.ListCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; import org.apache.beam.sdk.extensions.gcp.util.Transport; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; -import org.apache.beam.sdk.io.AvroSource; import org.apache.beam.sdk.io.BoundedSource; +import org.apache.beam.sdk.io.BoundedSource.BoundedReader; import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.io.fs.MoveOptions; import org.apache.beam.sdk.io.fs.ResolveOptions; import org.apache.beam.sdk.io.fs.ResourceId; @@ -1179,11 +1182,11 @@ public PCollection expand(PBegin input) { // if both toRowFn and fromRowFn values are set, enable Beam schema support Pipeline p = input.getPipeline(); + BigQueryOptions bqOptions = p.getOptions().as(BigQueryOptions.class); final BigQuerySourceDef sourceDef = createSourceDef(); Schema beamSchema = null; if (getTypeDescriptor() != null && getToBeamRowFn() != null && getFromBeamRowFn() != null) { - BigQueryOptions bqOptions = p.getOptions().as(BigQueryOptions.class); beamSchema = sourceDef.getBeamSchema(bqOptions); beamSchema = getFinalSchema(beamSchema, getSelectedFields()); } @@ -1191,7 +1194,7 @@ public PCollection expand(PBegin input) { final Coder coder = inferCoder(p.getCoderRegistry()); if (getMethod() == TypedRead.Method.DIRECT_READ) { - return expandForDirectRead(input, coder, beamSchema); + return expandForDirectRead(input, coder, beamSchema, bqOptions); } checkArgument( @@ -1369,7 +1372,7 @@ private static Schema getFinalSchema( } private PCollection expandForDirectRead( - PBegin input, Coder outputCoder, Schema beamSchema) { + PBegin input, Coder outputCoder, Schema beamSchema, BigQueryOptions bqOptions) { ValueProvider tableProvider = getTableProvider(); Pipeline p = input.getPipeline(); if (tableProvider != null) { @@ -1416,6 +1419,7 @@ private PCollection expandForDirectRead( // PCollectionView jobIdTokenView; + PCollectionTuple tuple; PCollection rows; if (!getWithTemplateCompatibility()) { @@ -1446,108 +1450,46 @@ public String apply(String input) { jobIdTokenView = jobIdTokenCollection.apply("ViewId", View.asSingleton()); TupleTag readStreamsTag = new TupleTag<>(); + TupleTag> listReadStreamsTag = new TupleTag<>(); TupleTag readSessionTag = new TupleTag<>(); TupleTag tableSchemaTag = new TupleTag<>(); - PCollectionTuple tuple = - jobIdTokenCollection.apply( - "RunQueryJob", - ParDo.of( - new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - BigQueryOptions options = - c.getPipelineOptions().as(BigQueryOptions.class); - String jobUuid = c.element(); - // Execute the query and get the destination table holding the results. - // The getTargetTable call runs a new instance of the query and returns - // the destination table created to hold the results. - BigQueryStorageQuerySource querySource = - createStorageQuerySource(jobUuid, outputCoder); - Table queryResultTable = querySource.getTargetTable(options); - - // Create a read session without specifying a desired stream count and - // let the BigQuery storage server pick the number of streams. - CreateReadSessionRequest request = - CreateReadSessionRequest.newBuilder() - .setParent( - BigQueryHelpers.toProjectResourceName( - options.getBigQueryProject() == null - ? options.getProject() - : options.getBigQueryProject())) - .setReadSession( - ReadSession.newBuilder() - .setTable( - BigQueryHelpers.toTableResourceName( - queryResultTable.getTableReference())) - .setDataFormat(DataFormat.AVRO)) - .setMaxStreamCount(0) - .build(); - - ReadSession readSession; - try (StorageClient storageClient = - getBigQueryServices().getStorageClient(options)) { - readSession = storageClient.createReadSession(request); - } - - for (ReadStream readStream : readSession.getStreamsList()) { - c.output(readStream); - } - - c.output(readSessionTag, readSession); - c.output( - tableSchemaTag, - BigQueryHelpers.toJsonString(queryResultTable.getSchema())); - } - }) - .withOutputTags( - readStreamsTag, TupleTagList.of(readSessionTag).and(tableSchemaTag))); + if (!bqOptions.getEnableBundling()) { + tuple = + createTupleForDirectRead( + jobIdTokenCollection, + outputCoder, + readStreamsTag, + readSessionTag, + tableSchemaTag); + tuple.get(readStreamsTag).setCoder(ProtoCoder.of(ReadStream.class)); + } else { + tuple = + createTupleForDirectReadWithStreamBundle( + jobIdTokenCollection, + outputCoder, + listReadStreamsTag, + readSessionTag, + tableSchemaTag); + tuple.get(listReadStreamsTag).setCoder(ListCoder.of(ProtoCoder.of(ReadStream.class))); + } - tuple.get(readStreamsTag).setCoder(ProtoCoder.of(ReadStream.class)); tuple.get(readSessionTag).setCoder(ProtoCoder.of(ReadSession.class)); tuple.get(tableSchemaTag).setCoder(StringUtf8Coder.of()); - PCollectionView readSessionView = tuple.get(readSessionTag).apply("ReadSessionView", View.asSingleton()); PCollectionView tableSchemaView = tuple.get(tableSchemaTag).apply("TableSchemaView", View.asSingleton()); - rows = - tuple - .get(readStreamsTag) - .apply(Reshuffle.viaRandomKey()) - .apply( - ParDo.of( - new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - ReadSession readSession = c.sideInput(readSessionView); - TableSchema tableSchema = - BigQueryHelpers.fromJsonString( - c.sideInput(tableSchemaView), TableSchema.class); - ReadStream readStream = c.element(); - - BigQueryStorageStreamSource streamSource = - BigQueryStorageStreamSource.create( - readSession, - readStream, - tableSchema, - getParseFn(), - outputCoder, - getBigQueryServices()); - - // Read all of the data from the stream. In the event that this work - // item fails and is rescheduled, the same rows will be returned in - // the same order. - BoundedSource.BoundedReader reader = - streamSource.createReader(c.getPipelineOptions()); - for (boolean more = reader.start(); more; more = reader.advance()) { - c.output(reader.getCurrent()); - } - } - }) - .withSideInputs(readSessionView, tableSchemaView)) - .setCoder(outputCoder); + if (!bqOptions.getEnableBundling()) { + rows = + createPCollectionForDirectRead( + tuple, outputCoder, readStreamsTag, readSessionView, tableSchemaView); + } else { + rows = + createPCollectionForDirectReadWithStreamBundle( + tuple, outputCoder, listReadStreamsTag, readSessionView, tableSchemaView); + } } PassThroughThenCleanup.CleanupOperation cleanupOperation = @@ -1593,6 +1535,235 @@ void cleanup(ContextContainer c) throws Exception { return rows.apply(new PassThroughThenCleanup<>(cleanupOperation, jobIdTokenView)); } + private PCollectionTuple createTupleForDirectRead( + PCollection jobIdTokenCollection, + Coder outputCoder, + TupleTag readStreamsTag, + TupleTag readSessionTag, + TupleTag tableSchemaTag) { + PCollectionTuple tuple = + jobIdTokenCollection.apply( + "RunQueryJob", + ParDo.of( + new DoFn() { + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + BigQueryOptions options = + c.getPipelineOptions().as(BigQueryOptions.class); + String jobUuid = c.element(); + // Execute the query and get the destination table holding the results. + // The getTargetTable call runs a new instance of the query and returns + // the destination table created to hold the results. + BigQueryStorageQuerySource querySource = + createStorageQuerySource(jobUuid, outputCoder); + Table queryResultTable = querySource.getTargetTable(options); + + // Create a read session without specifying a desired stream count and + // let the BigQuery storage server pick the number of streams. + CreateReadSessionRequest request = + CreateReadSessionRequest.newBuilder() + .setParent( + BigQueryHelpers.toProjectResourceName( + options.getBigQueryProject() == null + ? options.getProject() + : options.getBigQueryProject())) + .setReadSession( + ReadSession.newBuilder() + .setTable( + BigQueryHelpers.toTableResourceName( + queryResultTable.getTableReference())) + .setDataFormat(DataFormat.AVRO)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession; + try (StorageClient storageClient = + getBigQueryServices().getStorageClient(options)) { + readSession = storageClient.createReadSession(request); + } + + for (ReadStream readStream : readSession.getStreamsList()) { + c.output(readStream); + } + + c.output(readSessionTag, readSession); + c.output( + tableSchemaTag, + BigQueryHelpers.toJsonString(queryResultTable.getSchema())); + } + }) + .withOutputTags( + readStreamsTag, TupleTagList.of(readSessionTag).and(tableSchemaTag))); + + return tuple; + } + + private PCollectionTuple createTupleForDirectReadWithStreamBundle( + PCollection jobIdTokenCollection, + Coder outputCoder, + TupleTag> listReadStreamsTag, + TupleTag readSessionTag, + TupleTag tableSchemaTag) { + + PCollectionTuple tuple = + jobIdTokenCollection.apply( + "RunQueryJob", + ParDo.of( + new DoFn>() { + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + BigQueryOptions options = + c.getPipelineOptions().as(BigQueryOptions.class); + String jobUuid = c.element(); + // Execute the query and get the destination table holding the results. + // The getTargetTable call runs a new instance of the query and returns + // the destination table created to hold the results. + BigQueryStorageQuerySource querySource = + createStorageQuerySource(jobUuid, outputCoder); + Table queryResultTable = querySource.getTargetTable(options); + + // Create a read session without specifying a desired stream count and + // let the BigQuery storage server pick the number of streams. + CreateReadSessionRequest request = + CreateReadSessionRequest.newBuilder() + .setParent( + BigQueryHelpers.toProjectResourceName( + options.getBigQueryProject() == null + ? options.getProject() + : options.getBigQueryProject())) + .setReadSession( + ReadSession.newBuilder() + .setTable( + BigQueryHelpers.toTableResourceName( + queryResultTable.getTableReference())) + .setDataFormat(DataFormat.AVRO)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession; + try (StorageClient storageClient = + getBigQueryServices().getStorageClient(options)) { + readSession = storageClient.createReadSession(request); + } + int streamIndex = 0; + int streamsPerBundle = 10; + List streamBundle = Lists.newArrayList(); + for (ReadStream readStream : readSession.getStreamsList()) { + streamIndex++; + streamBundle.add(readStream); + if (streamIndex % streamsPerBundle == 0) { + c.output(streamBundle); + streamBundle = Lists.newArrayList(); + } + } + if (streamIndex % streamsPerBundle != 0) { + c.output(streamBundle); + } + c.output(readSessionTag, readSession); + c.output( + tableSchemaTag, + BigQueryHelpers.toJsonString(queryResultTable.getSchema())); + } + }) + .withOutputTags( + listReadStreamsTag, TupleTagList.of(readSessionTag).and(tableSchemaTag))); + + return tuple; + } + + private PCollection createPCollectionForDirectRead( + PCollectionTuple tuple, + Coder outputCoder, + TupleTag readStreamsTag, + PCollectionView readSessionView, + PCollectionView tableSchemaView) { + PCollection rows = + tuple + .get(readStreamsTag) + .apply(Reshuffle.viaRandomKey()) + .apply( + ParDo.of( + new DoFn() { + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + ReadSession readSession = c.sideInput(readSessionView); + TableSchema tableSchema = + BigQueryHelpers.fromJsonString( + c.sideInput(tableSchemaView), TableSchema.class); + ReadStream readStream = c.element(); + + BigQueryStorageStreamSource streamSource = + BigQueryStorageStreamSource.create( + readSession, + readStream, + tableSchema, + getParseFn(), + outputCoder, + getBigQueryServices()); + + // Read all of the data from the stream. In the event that this work + // item fails and is rescheduled, the same rows will be returned in + // the same order. + BoundedSource.BoundedReader reader = + streamSource.createReader(c.getPipelineOptions()); + for (boolean more = reader.start(); more; more = reader.advance()) { + c.output(reader.getCurrent()); + } + } + }) + .withSideInputs(readSessionView, tableSchemaView)) + .setCoder(outputCoder); + + return rows; + } + + private PCollection createPCollectionForDirectReadWithStreamBundle( + PCollectionTuple tuple, + Coder outputCoder, + TupleTag> listReadStreamsTag, + PCollectionView readSessionView, + PCollectionView tableSchemaView) { + PCollection rows = + tuple + .get(listReadStreamsTag) + .apply(Reshuffle.viaRandomKey()) + .apply( + ParDo.of( + new DoFn, T>() { + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + ReadSession readSession = c.sideInput(readSessionView); + TableSchema tableSchema = + BigQueryHelpers.fromJsonString( + c.sideInput(tableSchemaView), TableSchema.class); + List streamBundle = c.element(); + + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + tableSchema, + getParseFn(), + outputCoder, + getBigQueryServices(), + 1L); + + // Read all of the data from the stream. In the event that this work + // item fails and is rescheduled, the same rows will be returned in + // the same order. + BoundedReader reader = + streamSource.createReader(c.getPipelineOptions()); + for (boolean more = reader.start(); more; more = reader.advance()) { + c.output(reader.getCurrent()); + } + } + }) + .withSideInputs(readSessionView, tableSchemaView)) + .setCoder(outputCoder); + + return rows; + } + @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java index bf09bf4d9e37..938d131a0da5 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java @@ -17,6 +17,8 @@ */ package org.apache.beam.sdk.io.gcp.bigquery; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; import org.apache.beam.sdk.options.ApplicationNameOptions; import org.apache.beam.sdk.options.Default; @@ -163,4 +165,13 @@ public interface BigQueryOptions Long getStorageWriteApiMaxRequestSize(); void setStorageWriteApiMaxRequestSize(Long value); + + @Experimental(Kind.UNSPECIFIED) + @Description( + "If set, BigQueryIO.Read will use the StreamBundle based" + + "implementation of the Read API Source") + @Default.Boolean(false) + Boolean getEnableBundling(); + + void setEnableBundling(Boolean value); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySource.java index 8e3b437bec50..8a35f56941fd 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySource.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySource.java @@ -21,7 +21,7 @@ import com.google.api.services.bigquery.model.TableSchema; import java.io.IOException; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.display.DisplayData; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java index 606d1d3ad886..628ec6cf7816 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryQuerySourceDef.java @@ -30,7 +30,7 @@ import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType; import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.schemas.Schema; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java index bc798c475734..7b734994bc12 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java @@ -1339,6 +1339,9 @@ public StreamAppendClient getStreamAppendClient( StreamWriter streamWriter = StreamWriter.newBuilder(streamName) + .setExecutorProvider( + FixedExecutorProvider.create( + options.as(ExecutorOptions.class).getScheduledExecutorService())) .setWriterSchema(protoSchema) .setChannelProvider(transportChannelProvider) .setEnableConnectionPool(useConnectionPool) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java index 1f16fb440495..c1db05b200c3 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java @@ -30,7 +30,7 @@ import java.io.IOException; import java.util.List; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.fs.MatchResult; import org.apache.beam.sdk.io.fs.ResourceId; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceDef.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceDef.java index 9532a2f4d6f2..733edd268112 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceDef.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceDef.java @@ -22,7 +22,7 @@ import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.transforms.SerializableFunction; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageArrowReader.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageArrowReader.java index 8f23825b9867..70703cf0082e 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageArrowReader.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageArrowReader.java @@ -28,7 +28,7 @@ import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.extensions.arrow.ArrowConversion; import org.apache.beam.sdk.extensions.arrow.ArrowConversion.RecordBatchRowIterator; -import org.apache.beam.sdk.schemas.utils.AvroUtils; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.values.Row; class BigQueryStorageArrowReader implements BigQueryStorageReader { diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java index 3c8335ac82b5..834409062ccd 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageSourceBase.java @@ -30,11 +30,11 @@ import org.apache.avro.Schema; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.extensions.arrow.ArrowConversion; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.ValueProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; @@ -100,7 +100,7 @@ public Coder getOutputCoder() { } @Override - public List> split( + public List> split( long desiredBundleSizeBytes, PipelineOptions options) throws Exception { BigQueryOptions bqOptions = options.as(BigQueryOptions.class); @Nullable Table targetTable = getTargetTable(bqOptions); @@ -133,13 +133,18 @@ public List> split( readSessionBuilder.setDataFormat(format); } + // Setting the requested max stream count to 0, implies that the Read API backend will select + // an appropriate number of streams for the Session to produce reasonable throughput. + // This is required when using the Read API Source V2. int streamCount = 0; - if (desiredBundleSizeBytes > 0) { - long tableSizeBytes = (targetTable != null) ? targetTable.getNumBytes() : 0; - streamCount = (int) Math.min(tableSizeBytes / desiredBundleSizeBytes, MAX_SPLIT_COUNT); - } + if (!bqOptions.getEnableBundling()) { + if (desiredBundleSizeBytes > 0) { + long tableSizeBytes = (targetTable != null) ? targetTable.getNumBytes() : 0; + streamCount = (int) Math.min(tableSizeBytes / desiredBundleSizeBytes, MAX_SPLIT_COUNT); + } - streamCount = Math.max(streamCount, MIN_SPLIT_COUNT); + streamCount = Math.max(streamCount, MIN_SPLIT_COUNT); + } CreateReadSessionRequest createReadSessionRequest = CreateReadSessionRequest.newBuilder() @@ -166,6 +171,25 @@ public List> split( return ImmutableList.of(); } + streamCount = readSession.getStreamsList().size(); + int streamsPerBundle = 0; + double bytesPerStream = 0; + LOG.info( + "Estimated bytes this ReadSession will scan when all Streams are consumed: '{}'", + readSession.getEstimatedTotalBytesScanned()); + if (bqOptions.getEnableBundling()) { + if (desiredBundleSizeBytes > 0) { + bytesPerStream = + (double) readSession.getEstimatedTotalBytesScanned() / readSession.getStreamsCount(); + LOG.info("Estimated bytes each Stream will consume: '{}'", bytesPerStream); + streamsPerBundle = (int) Math.ceil(desiredBundleSizeBytes / bytesPerStream); + } else { + streamsPerBundle = (int) Math.ceil((double) streamCount / 10); + } + streamsPerBundle = Math.min(streamCount, streamsPerBundle); + LOG.info("Distributing '{}' Streams per StreamBundle.", streamsPerBundle); + } + Schema sessionSchema; if (readSession.getDataFormat() == DataFormat.ARROW) { org.apache.arrow.vector.types.pojo.Schema schema = @@ -180,18 +204,37 @@ public List> split( throw new IllegalArgumentException( "data is not in a supported dataFormat: " + readSession.getDataFormat()); } - + int streamIndex = 0; Preconditions.checkStateNotNull( targetTable); // TODO: this is inconsistent with method above, where it can be null TableSchema trimmedSchema = BigQueryAvroUtils.trimBigQueryTableSchema(targetTable.getSchema(), sessionSchema); - List> sources = Lists.newArrayList(); + if (!bqOptions.getEnableBundling()) { + List> sources = Lists.newArrayList(); + for (ReadStream readStream : readSession.getStreamsList()) { + sources.add( + BigQueryStorageStreamSource.create( + readSession, readStream, trimmedSchema, parseFn, outputCoder, bqServices)); + } + return ImmutableList.copyOf(sources); + } + List streamBundle = Lists.newArrayList(); + List> sources = Lists.newArrayList(); for (ReadStream readStream : readSession.getStreamsList()) { + streamIndex++; + streamBundle.add(readStream); + if (streamIndex % streamsPerBundle == 0) { + sources.add( + BigQueryStorageStreamBundleSource.create( + readSession, streamBundle, trimmedSchema, parseFn, outputCoder, bqServices, 1L)); + streamBundle = Lists.newArrayList(); + } + } + if (streamIndex % streamsPerBundle != 0) { sources.add( - BigQueryStorageStreamSource.create( - readSession, readStream, trimmedSchema, parseFn, outputCoder, bqServices)); + BigQueryStorageStreamBundleSource.create( + readSession, streamBundle, trimmedSchema, parseFn, outputCoder, bqServices, 1L)); } - return ImmutableList.copyOf(sources); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamBundleSource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamBundleSource.java new file mode 100644 index 000000000000..42e99b6aae38 --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryStorageStreamBundleSource.java @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigquery; + +import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.fromJsonString; +import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; + +import com.google.api.gax.rpc.ApiException; +import com.google.api.services.bigquery.model.TableReference; +import com.google.api.services.bigquery.model.TableSchema; +import com.google.cloud.bigquery.storage.v1.ReadRowsRequest; +import com.google.cloud.bigquery.storage.v1.ReadRowsResponse; +import com.google.cloud.bigquery.storage.v1.ReadSession; +import com.google.cloud.bigquery.storage.v1.ReadStream; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import org.apache.beam.runners.core.metrics.ServiceCallMetric; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.io.OffsetBasedSource; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.BigQueryServerStream; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.util.Preconditions; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.checkerframework.checker.nullness.qual.RequiresNonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link org.apache.beam.sdk.io.Source} representing a bundle of Streams in a BigQuery ReadAPI + * Session. This Source ONLY supports splitting at the StreamBundle level. + * + *

{@link BigQueryStorageStreamBundleSource} defines a split-point as the starting offset of each + * Stream. As a result, the number of valid split points in the Source is equal to the number of + * Streams in the StreamBundle and this Source does NOT support sub-Stream splitting. + * + *

Additionally, the underlying {@link org.apache.beam.sdk.io.range.OffsetRangeTracker} and + * {@link OffsetBasedSource} operate in the split point space and do NOT directly interact with the + * Streams constituting the StreamBundle. Consequently, fractional values used in + * `splitAtFraction()` are translated into StreamBundleIndices and the underlying RangeTracker + * handles the split operation by checking the validity of the split point. This has the following + * implications for the `splitAtFraction()` operation: + * + *

1. Fraction values that point to the "middle" of a Stream will be translated to the + * appropriate Stream boundary by the RangeTracker. + * + *

2. Once a Stream is being read from, the RangeTracker will only accept `splitAtFraction()` + * calls that point to StreamBundleIndices that are greater than the StreamBundleIndex of the + * current Stream + * + * @param Type of records represented by the source. + * @see OffsetBasedSource + * @see org.apache.beam.sdk.io.range.OffsetRangeTracker + * @see org.apache.beam.sdk.io.BlockBasedSource (semantically similar to {@link + * BigQueryStorageStreamBundleSource}) + */ +class BigQueryStorageStreamBundleSource extends OffsetBasedSource { + + public static BigQueryStorageStreamBundleSource create( + ReadSession readSession, + List streamBundle, + TableSchema tableSchema, + SerializableFunction parseFn, + Coder outputCoder, + BigQueryServices bqServices, + long minBundleSize) { + return new BigQueryStorageStreamBundleSource<>( + readSession, + streamBundle, + toJsonString(Preconditions.checkArgumentNotNull(tableSchema, "tableSchema")), + parseFn, + outputCoder, + bqServices, + minBundleSize); + } + + /** + * Creates a new source with the same properties as this one, except with a different {@link + * List}. + */ + public BigQueryStorageStreamBundleSource fromExisting(List newStreamBundle) { + return new BigQueryStorageStreamBundleSource<>( + readSession, + newStreamBundle, + jsonTableSchema, + parseFn, + outputCoder, + bqServices, + getMinBundleSize()); + } + + private final ReadSession readSession; + private final List streamBundle; + private final String jsonTableSchema; + private final SerializableFunction parseFn; + private final Coder outputCoder; + private final BigQueryServices bqServices; + + private BigQueryStorageStreamBundleSource( + ReadSession readSession, + List streamBundle, + String jsonTableSchema, + SerializableFunction parseFn, + Coder outputCoder, + BigQueryServices bqServices, + long minBundleSize) { + super(0, streamBundle.size(), minBundleSize); + this.readSession = Preconditions.checkArgumentNotNull(readSession, "readSession"); + this.streamBundle = Preconditions.checkArgumentNotNull(streamBundle, "streams"); + this.jsonTableSchema = Preconditions.checkArgumentNotNull(jsonTableSchema, "jsonTableSchema"); + this.parseFn = Preconditions.checkArgumentNotNull(parseFn, "parseFn"); + this.outputCoder = Preconditions.checkArgumentNotNull(outputCoder, "outputCoder"); + this.bqServices = Preconditions.checkArgumentNotNull(bqServices, "bqServices"); + } + + @Override + public Coder getOutputCoder() { + return outputCoder; + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .add(DisplayData.item("table", readSession.getTable()).withLabel("Table")) + .add(DisplayData.item("readSession", readSession.getName()).withLabel("Read session")); + for (ReadStream readStream : streamBundle) { + builder.add(DisplayData.item("stream", readStream.getName()).withLabel("Stream")); + } + } + + @Override + public long getEstimatedSizeBytes(PipelineOptions options) { + // The size of stream source can't be estimated due to server-side liquid sharding. + // TODO: Implement progress reporting. + return 0L; + } + + @Override + public List> split( + long desiredBundleSizeBytes, PipelineOptions options) { + // This method is only called for initial splits. Since this class will always be a child source + // of BigQueryStorageSourceBase, all splits here will be handled by `splitAtFraction()`. As a + // result, this is a no-op. + return ImmutableList.of(this); + } + + @Override + public long getMaxEndOffset(PipelineOptions options) throws Exception { + return this.streamBundle.size(); + } + + @Override + public OffsetBasedSource createSourceForSubrange(long start, long end) { + List newStreamBundle = streamBundle.subList((int) start, (int) end); + return fromExisting(newStreamBundle); + } + + @Override + public BigQueryStorageStreamBundleReader createReader(PipelineOptions options) + throws IOException { + return new BigQueryStorageStreamBundleReader<>(this, options.as(BigQueryOptions.class)); + } + + public static class BigQueryStorageStreamBundleReader extends OffsetBasedReader { + private static final Logger LOG = + LoggerFactory.getLogger(BigQueryStorageStreamBundleReader.class); + + private final BigQueryStorageReader reader; + private final SerializableFunction parseFn; + private final StorageClient storageClient; + private final TableSchema tableSchema; + + private BigQueryStorageStreamBundleSource source; + private @Nullable BigQueryServerStream responseStream = null; + private @Nullable Iterator responseIterator = null; + private @Nullable T current = null; + private int currentStreamBundleIndex; + private long currentStreamOffset; + + // Values used for progress reporting. + private double fractionOfStreamBundleConsumed; + + private double progressAtResponseStart; + private double progressAtResponseEnd; + private long rowsConsumedFromCurrentResponse; + private long totalRowsInCurrentResponse; + + private @Nullable TableReference tableReference; + private @Nullable ServiceCallMetric serviceCallMetric; + + private BigQueryStorageStreamBundleReader( + BigQueryStorageStreamBundleSource source, BigQueryOptions options) throws IOException { + super(source); + this.source = source; + this.reader = BigQueryStorageReaderFactory.getReader(source.readSession); + this.parseFn = source.parseFn; + this.storageClient = source.bqServices.getStorageClient(options); + this.tableSchema = fromJsonString(source.jsonTableSchema, TableSchema.class); + this.currentStreamBundleIndex = 0; + this.fractionOfStreamBundleConsumed = 0d; + this.progressAtResponseStart = 0d; + this.progressAtResponseEnd = 0d; + this.rowsConsumedFromCurrentResponse = 0L; + this.totalRowsInCurrentResponse = 0L; + } + + @Override + public T getCurrent() throws NoSuchElementException { + if (current == null) { + throw new NoSuchElementException(); + } + return current; + } + + @Override + protected long getCurrentOffset() throws NoSuchElementException { + return currentStreamBundleIndex; + } + + @Override + protected boolean isAtSplitPoint() throws NoSuchElementException { + if (currentStreamOffset == 0) { + return true; + } + return false; + } + + @Override + public boolean startImpl() throws IOException { + return readNextStream(); + } + + @Override + public boolean advanceImpl() throws IOException { + Preconditions.checkStateNotNull(responseIterator); + currentStreamOffset += totalRowsInCurrentResponse; + return readNextRecord(); + } + + private boolean readNextStream() throws IOException { + BigQueryStorageStreamBundleSource source = getCurrentSource(); + if (currentStreamBundleIndex == source.streamBundle.size()) { + fractionOfStreamBundleConsumed = 1d; + return false; + } + ReadRowsRequest request = + ReadRowsRequest.newBuilder() + .setReadStream(source.streamBundle.get(currentStreamBundleIndex).getName()) + .build(); + tableReference = BigQueryUtils.toTableReference(source.readSession.getTable()); + serviceCallMetric = BigQueryUtils.readCallMetric(tableReference); + LOG.info( + "Started BigQuery Storage API read from stream {}.", + source.streamBundle.get(currentStreamBundleIndex).getName()); + responseStream = storageClient.readRows(request, source.readSession.getTable()); + responseIterator = responseStream.iterator(); + return readNextRecord(); + } + + @RequiresNonNull("responseIterator") + private boolean readNextRecord() throws IOException { + Iterator responseIterator = this.responseIterator; + if (responseIterator == null) { + LOG.info("Received null responseIterator for stream {}", currentStreamBundleIndex); + return false; + } + while (reader.readyForNextReadResponse()) { + if (!responseIterator.hasNext()) { + synchronized (this) { + currentStreamOffset = 0; + currentStreamBundleIndex++; + } + return readNextStream(); + } + + ReadRowsResponse response; + try { + response = responseIterator.next(); + // Since we don't have a direct hook to the underlying + // API call, record success every time we read a record successfully. + if (serviceCallMetric != null) { + serviceCallMetric.call("ok"); + } + } catch (ApiException e) { + // Occasionally the iterator will fail and raise an exception. + // Capture it here and record the error in the metric. + if (serviceCallMetric != null) { + serviceCallMetric.call(e.getStatusCode().getCode().name()); + } + throw e; + } + + progressAtResponseStart = response.getStats().getProgress().getAtResponseStart(); + progressAtResponseEnd = response.getStats().getProgress().getAtResponseEnd(); + totalRowsInCurrentResponse = response.getRowCount(); + rowsConsumedFromCurrentResponse = 0L; + + checkArgument( + totalRowsInCurrentResponse >= 0, + "Row count from current response (%s) must be non-negative.", + totalRowsInCurrentResponse); + + checkArgument( + 0f <= progressAtResponseStart && progressAtResponseStart <= 1f, + "Progress at response start (%s) is not in the range [0.0, 1.0].", + progressAtResponseStart); + + checkArgument( + 0f <= progressAtResponseEnd && progressAtResponseEnd <= 1f, + "Progress at response end (%s) is not in the range [0.0, 1.0].", + progressAtResponseEnd); + reader.processReadRowsResponse(response); + } + + SchemaAndRecord schemaAndRecord = new SchemaAndRecord(reader.readSingleRecord(), tableSchema); + + current = parseFn.apply(schemaAndRecord); + + // Calculates the fraction of the current stream that has been consumed. This value is + // calculated by interpolating between the fraction consumed value from the previous server + // response (or zero if we're consuming the first response) and the fractional value in the + // current response based on how many of the rows in the current response have been consumed. + rowsConsumedFromCurrentResponse++; + + double fractionOfCurrentStreamConsumed = + progressAtResponseStart + + ((progressAtResponseEnd - progressAtResponseStart) + * (rowsConsumedFromCurrentResponse * 1.0 / totalRowsInCurrentResponse)); + + // We now calculate the progress made over the entire StreamBundle by assuming that each + // Stream in the StreamBundle has approximately the same amount of data. Given this, merely + // counting the number of Streams that have been read and linearly interpolating with the + // progress made in the current Stream gives us the overall StreamBundle progress. + fractionOfStreamBundleConsumed = + (currentStreamBundleIndex + fractionOfCurrentStreamConsumed) / source.streamBundle.size(); + return true; + } + + @Override + public synchronized void close() { + // Because superclass cannot have preconditions around these variables, cannot use + // @RequiresNonNull + Preconditions.checkStateNotNull(storageClient); + Preconditions.checkStateNotNull(reader); + storageClient.close(); + reader.close(); + } + + @Override + public synchronized BigQueryStorageStreamBundleSource getCurrentSource() { + return source; + } + + @Override + public synchronized Double getFractionConsumed() { + return fractionOfStreamBundleConsumed; + } + } +} diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java index 2d274ed0e511..9d24246405ac 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java @@ -23,7 +23,7 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicReference; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.transforms.SerializableFunction; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSourceDef.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSourceDef.java index e78ea0b5d7c4..9b77f6a7ef9d 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSourceDef.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSourceDef.java @@ -27,7 +27,7 @@ import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.AvroSource; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService; import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.schemas.Schema; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiDynamicDestinationsGenericRecord.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiDynamicDestinationsGenericRecord.java index 020ea70df539..98684db558bb 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiDynamicDestinationsGenericRecord.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiDynamicDestinationsGenericRecord.java @@ -23,8 +23,8 @@ import com.google.protobuf.Message; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.SerializableFunction; import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java index 95a8027516d1..49f6d436594a 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java @@ -39,6 +39,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.math.RoundingMode; +import java.time.DateTimeException; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -80,11 +81,32 @@ public class TableRowToStorageApiProto { private static final DateTimeFormatter DATETIME_SPACE_FORMATTER = new DateTimeFormatterBuilder() .append(DateTimeFormatter.ISO_LOCAL_DATE) + .optionalStart() .appendLiteral(' ') + .optionalEnd() + .optionalStart() + .appendLiteral('T') + .optionalEnd() .append(DateTimeFormatter.ISO_LOCAL_TIME) .toFormatter() .withZone(ZoneOffset.UTC); + private static final DateTimeFormatter TIMESTAMP_FORMATTER = + new DateTimeFormatterBuilder() + // 'yyyy-MM-dd(T| )HH:mm:ss.SSSSSSSSS' + .append(DATETIME_SPACE_FORMATTER) + // 'yyyy-MM-dd(T| )HH:mm:ss.SSSSSSSSS(+HH:MM:ss|Z)' + .optionalStart() + .appendOffsetId() + .optionalEnd() + // 'yyyy-MM-dd(T| )HH:mm:ss.SSSSSSSSS [time_zone]', time_zone -> UTC, Asia/Kolkata, etc + // if both an offset and a time zone are provided, the offset takes precedence + .optionalStart() + .appendLiteral(' ') + .parseCaseSensitive() + .appendZoneRegionId() + .toFormatter(); + abstract static class SchemaConversionException extends Exception { SchemaConversionException(String msg) { super(msg); @@ -737,18 +759,21 @@ private static void fieldDescriptorFromTableField( case TIMESTAMP: if (value instanceof String) { try { - // '2011-12-03T10:15:30+01:00' '2011-12-03T10:15:30' + // '2011-12-03T10:15:30Z', '2011-12-03 10:15:30+05:00' + // '2011-12-03 10:15:30 UTC', '2011-12-03T10:15:30 America/New_York' return ChronoUnit.MICROS.between( - Instant.EPOCH, Instant.from(DateTimeFormatter.ISO_DATE_TIME.parse((String) value))); - } catch (DateTimeParseException e) { + Instant.EPOCH, Instant.from(TIMESTAMP_FORMATTER.parse((String) value))); + } catch (DateTimeException e) { try { + // for backwards compatibility, default time zone is UTC for values with no time-zone + // '2011-12-03T10:15:30' + return ChronoUnit.MICROS.between( + Instant.EPOCH, + Instant.from(TIMESTAMP_FORMATTER.withZone(ZoneOffset.UTC).parse((String) value))); + } catch (DateTimeParseException err) { // "12345667" return ChronoUnit.MICROS.between( Instant.EPOCH, Instant.ofEpochMilli(Long.parseLong((String) value))); - } catch (NumberFormatException e2) { - // "yyyy-MM-dd HH:mm:ss.SSSSSS" - return ChronoUnit.MICROS.between( - Instant.EPOCH, Instant.from(DATETIME_SPACE_FORMATTER.parse((String) value))); } } } else if (value instanceof Instant) { diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java index b151818a18b1..1e27bf98bece 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java @@ -336,7 +336,8 @@ BigQueryIO.Write createStorageWriteApiTransform() { .to(configuration.getTable()) .withMethod(writeMethod) .useBeamSchema() - .withFormatFunction(BigQueryUtils.toTableRow()); + .withFormatFunction(BigQueryUtils.toTableRow()) + .withWriteDisposition(WriteDisposition.WRITE_APPEND); if (!Strings.isNullOrEmpty(configuration.getCreateDisposition())) { CreateDisposition createDisposition = diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java index e864e649e699..e4de1680d5ad 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java @@ -31,8 +31,8 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ThreadLocalRandom; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Objects; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java index a95b456aa52b..7bac875e2375 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java @@ -40,10 +40,11 @@ import org.apache.beam.sdk.PipelineRunner; import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; import org.apache.beam.sdk.extensions.protobuf.ProtoDomain; import org.apache.beam.sdk.extensions.protobuf.ProtoDynamicMessageSchema; @@ -55,7 +56,6 @@ import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.SchemaCoder; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java index 263c3fb95005..21885773e8aa 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.List; import java.util.Objects; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; import org.apache.beam.sdk.schemas.AutoValueSchema; import org.apache.beam.sdk.schemas.Schema; @@ -33,7 +34,6 @@ import org.apache.beam.sdk.schemas.transforms.SchemaTransform; import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider; import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.utils.JsonUtils; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java index d2d510204ca7..bd083f8892af 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java @@ -29,13 +29,13 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.AutoValueSchema; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; import org.apache.beam.sdk.schemas.transforms.SchemaTransform; import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider; import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.utils.JsonUtils; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChangeStreamRecordMetadata.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChangeStreamRecordMetadata.java index af145b282a1e..22c295550b79 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChangeStreamRecordMetadata.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChangeStreamRecordMetadata.java @@ -22,8 +22,8 @@ import java.util.Objects; import javax.annotation.Nullable; import org.apache.avro.reflect.AvroEncode; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.spanner.changestreams.encoder.TimestampEncoding; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartition.java index a58434e8770b..84ab846712c1 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartition.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartition.java @@ -21,8 +21,8 @@ import java.util.HashSet; import java.util.Objects; import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; /** diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartitionsRecord.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartitionsRecord.java index 7384f6850b96..2dd9d92682b2 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartitionsRecord.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ChildPartitionsRecord.java @@ -22,8 +22,8 @@ import java.util.Objects; import org.apache.avro.reflect.AvroEncode; import org.apache.avro.reflect.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.spanner.changestreams.encoder.TimestampEncoding; /** diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ColumnType.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ColumnType.java index fd27934b5638..c54036ff3915 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ColumnType.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ColumnType.java @@ -20,8 +20,8 @@ import java.io.Serializable; import java.util.Objects; import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.schemas.annotations.SchemaCreate; /** diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/DataChangeRecord.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/DataChangeRecord.java index 837e334755c3..e00ef9c08ca1 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/DataChangeRecord.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/DataChangeRecord.java @@ -22,8 +22,8 @@ import java.util.Objects; import org.apache.avro.reflect.AvroEncode; import org.apache.avro.reflect.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.spanner.changestreams.encoder.TimestampEncoding; /** diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/HeartbeatRecord.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/HeartbeatRecord.java index 3f8b88afd332..d343dabc69a9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/HeartbeatRecord.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/HeartbeatRecord.java @@ -21,8 +21,8 @@ import java.util.Objects; import org.apache.avro.reflect.AvroEncode; import org.apache.avro.reflect.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.spanner.changestreams.encoder.TimestampEncoding; /** diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/Mod.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/Mod.java index 60e62f02eb6e..37e80877b2f3 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/Mod.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/Mod.java @@ -20,8 +20,8 @@ import java.io.Serializable; import java.util.Objects; import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** * Represents a modification in a table emitted within a {@link DataChangeRecord}. Each mod contains diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ModType.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ModType.java index e56dd0af6d65..719e2e7c27f9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ModType.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ModType.java @@ -17,8 +17,8 @@ */ package org.apache.beam.sdk.io.gcp.spanner.changestreams.model; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** * Represents the type of modification applied in the {@link DataChangeRecord}. It can be one of the diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/PartitionMetadata.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/PartitionMetadata.java index a32c832b84d0..765675d34743 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/PartitionMetadata.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/PartitionMetadata.java @@ -24,8 +24,8 @@ import java.util.Objects; import javax.annotation.Nullable; import org.apache.avro.reflect.AvroEncode; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.gcp.spanner.changestreams.encoder.TimestampEncoding; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/TypeCode.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/TypeCode.java index df4f520f3738..49498bded2ee 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/TypeCode.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/TypeCode.java @@ -20,8 +20,8 @@ import java.io.Serializable; import java.util.Objects; import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** * Represents a type of a column within Cloud Spanner. The type itself is encoded in a String code. diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ValueCaptureType.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ValueCaptureType.java index 6a55bdc72d2b..9e336e18eb7b 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ValueCaptureType.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/model/ValueCaptureType.java @@ -17,8 +17,8 @@ */ package org.apache.beam.sdk.io.gcp.spanner.changestreams.model; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; /** * Represents the capture type of a change stream. The only supported value at the moment is diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProtoTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProtoTest.java index 94c58e414251..8b32e1b054df 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProtoTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProtoTest.java @@ -41,7 +41,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.beam.sdk.schemas.utils.AvroUtils.TypeWithNullability; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Functions; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; @@ -365,8 +365,8 @@ void validateDescriptorAgainstSchema(Schema originalSchema, DescriptorProto sche .getFieldList() .forEach( p -> { - TypeWithNullability fieldSchema = - TypeWithNullability.create( + AvroUtils.TypeWithNullability fieldSchema = + AvroUtils.TypeWithNullability.create( originalSchema.getField(nameMapping.get(p.getName())).schema()); Label label = fieldSchema.getType().getType() == Schema.Type.ARRAY diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java index f24796837e6d..1a1a67998a14 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java @@ -39,8 +39,8 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.reflect.Nullable; import org.apache.avro.util.Utf8; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java index 4ca0fa96b5bd..01f58880527b 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOReadTest.java @@ -49,9 +49,9 @@ import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.SerializableCoder; import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.extensions.avro.io.AvroSource; import org.apache.beam.sdk.extensions.protobuf.ByteStringCoder; import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; -import org.apache.beam.sdk.io.AvroSource; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.QueryPriority; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.JobType; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java index d5dcee095c6a..491c5ada4f3a 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadTest.java @@ -84,9 +84,9 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.CoderRegistry; import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.protobuf.ByteStringCoder; import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; import org.apache.beam.sdk.io.BoundedSource; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadWithStreamBundleSourceTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadWithStreamBundleSourceTest.java new file mode 100644 index 000000000000..fc1ccd3c8914 --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageReadWithStreamBundleSourceTest.java @@ -0,0 +1,2156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigquery; + +import static java.util.Arrays.asList; +import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.withSettings; + +import com.google.api.services.bigquery.model.Streamingbuffer; +import com.google.api.services.bigquery.model.Table; +import com.google.api.services.bigquery.model.TableFieldSchema; +import com.google.api.services.bigquery.model.TableReference; +import com.google.api.services.bigquery.model.TableRow; +import com.google.api.services.bigquery.model.TableSchema; +import com.google.cloud.bigquery.storage.v1.ArrowRecordBatch; +import com.google.cloud.bigquery.storage.v1.ArrowSchema; +import com.google.cloud.bigquery.storage.v1.AvroRows; +import com.google.cloud.bigquery.storage.v1.AvroSchema; +import com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest; +import com.google.cloud.bigquery.storage.v1.DataFormat; +import com.google.cloud.bigquery.storage.v1.ReadRowsRequest; +import com.google.cloud.bigquery.storage.v1.ReadRowsResponse; +import com.google.cloud.bigquery.storage.v1.ReadSession; +import com.google.cloud.bigquery.storage.v1.ReadStream; +import com.google.cloud.bigquery.storage.v1.StreamStats; +import com.google.cloud.bigquery.storage.v1.StreamStats.Progress; +import com.google.protobuf.ByteString; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigInteger; +import java.nio.channels.Channels; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.VectorUnloader; +import org.apache.arrow.vector.ipc.WriteChannel; +import org.apache.arrow.vector.ipc.message.MessageSerializer; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.util.Text; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData.Record; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.extensions.protobuf.ByteStringCoder; +import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; +import org.apache.beam.sdk.io.BoundedSource; +import org.apache.beam.sdk.io.BoundedSource.BoundedReader; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TableRowParser; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryStorageStreamBundleSource.BigQueryStorageStreamBundleReader; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.ConversionOptions; +import org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices; +import org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices.FakeBigQueryServerStream; +import org.apache.beam.sdk.io.gcp.testing.FakeDatasetService; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.beam.sdk.schemas.FieldAccessDescriptor; +import org.apache.beam.sdk.schemas.SchemaCoder; +import org.apache.beam.sdk.schemas.transforms.Convert; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TupleTag; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.junit.runners.model.Statement; + +/** + * Tests for {@link BigQueryIO#readTableRows() using {@link Method#DIRECT_READ}} AND {@link + * BigQueryOptions#setEnableBundling(Boolean)} (Boolean)} set to True. + */ +@RunWith(JUnit4.class) +public class BigQueryIOStorageReadWithStreamBundleSourceTest { + + private transient PipelineOptions options; + private final transient TemporaryFolder testFolder = new TemporaryFolder(); + private transient TestPipeline p; + private BufferAllocator allocator; + + @Rule + public final transient TestRule folderThenPipeline = + new TestRule() { + @Override + public Statement apply(Statement base, Description description) { + // We need to set up the temporary folder, and then set up the TestPipeline based on the + // chosen folder. Unfortunately, since rule evaluation order is unspecified and unrelated + // to field order, and is separate from construction, that requires manually creating this + // TestRule. + Statement withPipeline = + new Statement() { + @Override + public void evaluate() throws Throwable { + options = TestPipeline.testingPipelineOptions(); + options.as(BigQueryOptions.class).setProject("project-id"); + if (description.getAnnotations().stream() + .anyMatch(a -> a.annotationType().equals(ProjectOverride.class))) { + options.as(BigQueryOptions.class).setBigQueryProject("bigquery-project-id"); + } + options + .as(BigQueryOptions.class) + .setTempLocation(testFolder.getRoot().getAbsolutePath()); + options.as(BigQueryOptions.class).setEnableBundling(true); + p = TestPipeline.fromOptions(options); + p.apply(base, description).evaluate(); + } + }; + return testFolder.apply(withPipeline, description); + } + }; + + @Rule public transient ExpectedException thrown = ExpectedException.none(); + + private final FakeDatasetService fakeDatasetService = new FakeDatasetService(); + + @Before + public void setUp() throws Exception { + FakeDatasetService.setUp(); + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @After + public void teardown() { + allocator.close(); + } + + @Test + public void testBuildTableBasedSource() { + BigQueryIO.TypedRead typedRead = + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("foo.com:project:dataset.table"); + checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); + assertTrue(typedRead.getValidate()); + } + + @Test + public void testBuildTableBasedSourceWithoutValidation() { + BigQueryIO.TypedRead typedRead = + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("foo.com:project:dataset.table") + .withoutValidation(); + checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); + assertFalse(typedRead.getValidate()); + } + + @Test + public void testBuildTableBasedSourceWithDefaultProject() { + BigQueryIO.TypedRead typedRead = + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("myDataset.myTable"); + checkTypedReadTableObject(typedRead, null, "myDataset", "myTable"); + } + + @Test + public void testBuildTableBasedSourceWithTableReference() { + TableReference tableReference = + new TableReference() + .setProjectId("foo.com:project") + .setDatasetId("dataset") + .setTableId("table"); + BigQueryIO.TypedRead typedRead = + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from(tableReference); + checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); + } + + private void checkTypedReadTableObject( + TypedRead typedRead, String project, String dataset, String table) { + assertEquals(project, typedRead.getTable().getProjectId()); + assertEquals(dataset, typedRead.getTable().getDatasetId()); + assertEquals(table, typedRead.getTable().getTableId()); + assertNull(typedRead.getQuery()); + assertEquals(Method.DIRECT_READ, typedRead.getMethod()); + } + + @Test + public void testBuildSourceWithTableAndFlatten() { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage( + "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference," + + " which only applies to queries"); + p.apply( + "ReadMyTable", + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("foo.com:project:dataset.table") + .withoutResultFlattening()); + p.run(); + } + + @Test + public void testBuildSourceWithTableAndSqlDialect() { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage( + "Invalid BigQueryIO.Read: Specifies a table with a SQL dialect preference," + + " which only applies to queries"); + p.apply( + "ReadMyTable", + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("foo.com:project:dataset.table") + .usingStandardSql()); + p.run(); + } + + @Test + public void testDisplayData() { + String tableSpec = "foo.com:project:dataset.table"; + BigQueryIO.TypedRead typedRead = + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .withSelectedFields(ImmutableList.of("foo", "bar")) + .withProjectionPushdownApplied() + .from(tableSpec); + DisplayData displayData = DisplayData.from(typedRead); + assertThat(displayData, hasDisplayItem("table", tableSpec)); + assertThat(displayData, hasDisplayItem("selectedFields", "foo, bar")); + assertThat(displayData, hasDisplayItem("projectionPushdownApplied", true)); + } + + @Test + public void testName() { + assertEquals( + "BigQueryIO.TypedRead", + BigQueryIO.read(new TableRowParser()) + .withCoder(TableRowJsonCoder.of()) + .withMethod(Method.DIRECT_READ) + .from("foo.com:project:dataset.table") + .getName()); + } + + @Test + public void testCoderInference() { + // Lambdas erase too much type information -- use an anonymous class here. + SerializableFunction> parseFn = + new SerializableFunction>() { + @Override + public KV apply(SchemaAndRecord input) { + return null; + } + }; + + assertEquals( + KvCoder.of(ByteStringCoder.of(), ProtoCoder.of(ReadSession.class)), + BigQueryIO.read(parseFn).inferCoder(CoderRegistry.createDefault())); + } + + @Test + public void testTableSourceEstimatedSize() throws Exception { + doTableSourceEstimatedSizeTest(false); + } + + @Test + public void testTableSourceEstimatedSize_IgnoresStreamingBuffer() throws Exception { + doTableSourceEstimatedSizeTest(true); + } + + private void doTableSourceEstimatedSizeTest(boolean useStreamingBuffer) throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(100L); + if (useStreamingBuffer) { + table.setStreamingBuffer(new Streamingbuffer().setEstimatedBytes(BigInteger.TEN)); + } + + fakeDatasetService.createTable(table); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(tableRef), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withDatasetService(fakeDatasetService)); + + assertEquals(100, tableSource.getEstimatedSizeBytes(options)); + } + + @Test + @ProjectOverride + public void testTableSourceEstimatedSize_WithBigQueryProject() throws Exception { + fakeDatasetService.createDataset("bigquery-project-id", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("bigquery-project-id:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(100L); + fakeDatasetService.createTable(table); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(BigQueryHelpers.parseTableSpec("dataset.table")), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withDatasetService(fakeDatasetService)); + + assertEquals(100, tableSource.getEstimatedSizeBytes(options)); + } + + @Test + public void testTableSourceEstimatedSize_WithDefaultProject() throws Exception { + fakeDatasetService.createDataset("project-id", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("project-id:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(100L); + fakeDatasetService.createTable(table); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(BigQueryHelpers.parseTableSpec("dataset.table")), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withDatasetService(fakeDatasetService)); + + assertEquals(100, tableSource.getEstimatedSizeBytes(options)); + } + + private static final String AVRO_SCHEMA_STRING = + "{\"namespace\": \"example.avro\",\n" + + " \"type\": \"record\",\n" + + " \"name\": \"RowRecord\",\n" + + " \"fields\": [\n" + + " {\"name\": \"name\", \"type\": \"string\"},\n" + + " {\"name\": \"number\", \"type\": \"long\"}\n" + + " ]\n" + + "}"; + + private static final Schema AVRO_SCHEMA = new Schema.Parser().parse(AVRO_SCHEMA_STRING); + + private static final String TRIMMED_AVRO_SCHEMA_STRING = + "{\"namespace\": \"example.avro\",\n" + + "\"type\": \"record\",\n" + + "\"name\": \"RowRecord\",\n" + + "\"fields\": [\n" + + " {\"name\": \"name\", \"type\": \"string\"}\n" + + " ]\n" + + "}"; + + private static final Schema TRIMMED_AVRO_SCHEMA = + new Schema.Parser().parse(TRIMMED_AVRO_SCHEMA_STRING); + + private static final TableSchema TABLE_SCHEMA = + new TableSchema() + .setFields( + ImmutableList.of( + new TableFieldSchema().setName("name").setType("STRING").setMode("REQUIRED"), + new TableFieldSchema().setName("number").setType("INTEGER").setMode("REQUIRED"))); + + private static final org.apache.arrow.vector.types.pojo.Schema ARROW_SCHEMA = + new org.apache.arrow.vector.types.pojo.Schema( + asList( + field("name", new ArrowType.Utf8()), field("number", new ArrowType.Int(64, true)))); + + private void doTableSourceInitialSplitTest(long bundleSize, long tableSize, int streamCount) + throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + + Table table = + new Table().setTableReference(tableRef).setNumBytes(tableSize).setSchema(TABLE_SCHEMA); + + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table")) + .setMaxStreamCount(0) + .build(); + + ReadSession.Builder builder = + ReadSession.newBuilder() + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .setDataFormat(DataFormat.AVRO) + .setEstimatedTotalBytesScanned(tableSize); + for (int i = 0; i < streamCount; i++) { + builder.addStreams(ReadStream.newBuilder().setName("stream-" + i)); + } + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(builder.build()); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(tableRef), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient)); + + List> sources = tableSource.split(bundleSize, options); + // Each StreamBundle is expected to contain a single stream. + assertEquals(streamCount, sources.size()); + } + + @Test + public void testTableSourceInitialSplit() throws Exception { + doTableSourceInitialSplitTest(1024L, 1024L * 1024L, 1024); + } + + @Test + public void testTableSourceInitialSplit_MinSplitCount() throws Exception { + doTableSourceInitialSplitTest(1024L, 1024L * 1024L, 10); + } + + @Test + public void testTableSourceInitialSplit_MaxSplitCount() throws Exception { + doTableSourceInitialSplitTest(10L, 1024L * 1024L, 10_000); + } + + @Test + public void testTableSourceInitialSplit_WithSelectedFieldsAndRowRestriction() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + + Table table = new Table().setTableReference(tableRef).setNumBytes(200L).setSchema(TABLE_SCHEMA); + + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table") + .setReadOptions( + ReadSession.TableReadOptions.newBuilder() + .addSelectedFields("name") + .setRowRestriction("number > 5"))) + .setMaxStreamCount(0) + .build(); + + ReadSession.Builder builder = + ReadSession.newBuilder() + .setAvroSchema(AvroSchema.newBuilder().setSchema(TRIMMED_AVRO_SCHEMA_STRING)) + .setDataFormat(DataFormat.AVRO) + .setEstimatedTotalBytesScanned(100L); + for (int i = 0; i < 10; i++) { + builder.addStreams(ReadStream.newBuilder().setName("stream-" + i)); + } + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(builder.build()); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(tableRef), + StaticValueProvider.of(Lists.newArrayList("name")), + StaticValueProvider.of("number > 5"), + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient)); + + List> sources = tableSource.split(20L, options); + assertEquals(5, sources.size()); + } + + @Test + public void testTableSourceInitialSplit_WithDefaultProject() throws Exception { + fakeDatasetService.createDataset("project-id", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("project-id:dataset.table"); + + Table table = + new Table().setTableReference(tableRef).setNumBytes(1024L).setSchema(TABLE_SCHEMA); + + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/project-id/datasets/dataset/tables/table")) + .setMaxStreamCount(0) + .build(); + + ReadSession.Builder builder = + ReadSession.newBuilder() + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .setDataFormat(DataFormat.AVRO) + .setEstimatedTotalBytesScanned(1024L); + for (int i = 0; i < 50; i++) { + builder.addStreams(ReadStream.newBuilder().setName("stream-" + i)); + } + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(builder.build()); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(BigQueryHelpers.parseTableSpec("dataset.table")), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient)); + + List> sources = tableSource.split(4096L, options); + // A single StreamBundle containing all the Streams. + assertEquals(1, sources.size()); + } + + @Test + public void testTableSourceInitialSplit_EmptyTable() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + + Table table = + new Table() + .setTableReference(tableRef) + .setNumBytes(1024L * 1024L) + .setSchema(new TableSchema()); + + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table")) + .setMaxStreamCount(0) + .build(); + + ReadSession emptyReadSession = ReadSession.newBuilder().build(); + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(emptyReadSession); + + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of(tableRef), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient)); + + List> sources = tableSource.split(1024L, options); + assertTrue(sources.isEmpty()); + } + + @Test + public void testTableSourceCreateReader() throws Exception { + BigQueryStorageTableSource tableSource = + BigQueryStorageTableSource.create( + ValueProvider.StaticValueProvider.of( + BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table")), + null, + null, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withDatasetService(fakeDatasetService)); + + thrown.expect(UnsupportedOperationException.class); + thrown.expectMessage("BigQuery storage source must be split before reading"); + tableSource.createReader(options); + } + + private static GenericRecord createRecord(String name, Schema schema) { + GenericRecord genericRecord = new Record(schema); + genericRecord.put("name", name); + return genericRecord; + } + + private static GenericRecord createRecord(String name, long number, Schema schema) { + GenericRecord genericRecord = new Record(schema); + genericRecord.put("name", name); + genericRecord.put("number", number); + return genericRecord; + } + + private static ByteString serializeArrowSchema( + org.apache.arrow.vector.types.pojo.Schema arrowSchema) { + ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(); + try { + MessageSerializer.serialize( + new WriteChannel(Channels.newChannel(byteOutputStream)), arrowSchema); + } catch (IOException ex) { + throw new RuntimeException("Failed to serialize arrow schema.", ex); + } + return ByteString.copyFrom(byteOutputStream.toByteArray()); + } + + private static final EncoderFactory ENCODER_FACTORY = EncoderFactory.get(); + + private static ReadRowsResponse createResponse( + Schema schema, + Collection genericRecords, + double progressAtResponseStart, + double progressAtResponseEnd) + throws Exception { + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Encoder binaryEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, null); + for (GenericRecord genericRecord : genericRecords) { + writer.write(genericRecord, binaryEncoder); + } + + binaryEncoder.flush(); + + return ReadRowsResponse.newBuilder() + .setAvroRows( + AvroRows.newBuilder() + .setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray())) + .setRowCount(genericRecords.size())) + .setRowCount(genericRecords.size()) + .setStats( + StreamStats.newBuilder() + .setProgress( + Progress.newBuilder() + .setAtResponseStart(progressAtResponseStart) + .setAtResponseEnd(progressAtResponseEnd))) + .build(); + } + + private ReadRowsResponse createResponseArrow( + org.apache.arrow.vector.types.pojo.Schema arrowSchema, + List name, + List number, + double progressAtResponseStart, + double progressAtResponseEnd) { + ArrowRecordBatch serializedRecord; + try (VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(arrowSchema, allocator)) { + schemaRoot.allocateNew(); + schemaRoot.setRowCount(name.size()); + VarCharVector strVector = (VarCharVector) schemaRoot.getFieldVectors().get(0); + BigIntVector bigIntVector = (BigIntVector) schemaRoot.getFieldVectors().get(1); + for (int i = 0; i < name.size(); i++) { + bigIntVector.set(i, number.get(i)); + strVector.set(i, new Text(name.get(i))); + } + + VectorUnloader unLoader = new VectorUnloader(schemaRoot); + try (org.apache.arrow.vector.ipc.message.ArrowRecordBatch records = + unLoader.getRecordBatch()) { + try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { + MessageSerializer.serialize(new WriteChannel(Channels.newChannel(os)), records); + serializedRecord = + ArrowRecordBatch.newBuilder() + .setRowCount(records.getLength()) + .setSerializedRecordBatch(ByteString.copyFrom(os.toByteArray())) + .build(); + } catch (IOException e) { + throw new RuntimeException("Error writing to byte array output stream", e); + } + } + } + + return ReadRowsResponse.newBuilder() + .setArrowRecordBatch(serializedRecord) + .setRowCount(name.size()) + .setStats( + StreamStats.newBuilder() + .setProgress( + Progress.newBuilder() + .setAtResponseStart(progressAtResponseStart) + .setAtResponseEnd(progressAtResponseEnd))) + .build(); + } + + @Test + public void testStreamSourceEstimatedSizeBytes() throws Exception { + List streamBundle = Lists.newArrayList(ReadStream.getDefaultInstance()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + ReadSession.getDefaultInstance(), + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices(), + 1L); + + assertEquals(0, streamSource.getEstimatedSizeBytes(options)); + } + + @Test + public void testStreamSourceSplit() throws Exception { + List streamBundle = Lists.newArrayList(ReadStream.getDefaultInstance()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + ReadSession.getDefaultInstance(), + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices(), + 1L); + + assertThat(streamSource.split(0, options), containsInAnyOrder(streamSource)); + } + + @Test + public void testReadFromStreamSource() throws Exception { + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(); + + ReadRowsRequest expectedRequestOne = + ReadRowsRequest.newBuilder().setReadStream("readStream1").setOffset(0).build(); + ReadRowsRequest expectedRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("readStream2").setOffset(0).build(); + + List records = + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), + createRecord("B", 2, AVRO_SCHEMA), + createRecord("C", 3, AVRO_SCHEMA), + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA)); + + List responsesOne = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.50), + createResponse(AVRO_SCHEMA, records.subList(2, 3), 0.5, 0.75)); + List responsesTwo = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(3, 5), 0.0, 0.50), + createResponse(AVRO_SCHEMA, records.subList(5, 6), 0.5, 0.75)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesOne)); + when(fakeStorageClient.readRows(expectedRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesTwo)); + + List streamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + List rows = new ArrayList<>(); + BigQueryStorageStreamBundleReader reader = streamSource.createReader(options); + for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) { + rows.add(reader.getCurrent()); + } + + System.out.println("Rows: " + rows); + + assertEquals(6, rows.size()); + } + + private static final double DELTA = 1e-6; + + @Test + public void testFractionConsumedWithOneStreamInBundle() throws Exception { + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(); + + ReadRowsRequest expectedRequest = + ReadRowsRequest.newBuilder().setReadStream("readStream").build(); + + List records = + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), + createRecord("B", 2, AVRO_SCHEMA), + createRecord("C", 3, AVRO_SCHEMA), + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA), + createRecord("G", 7, AVRO_SCHEMA)); + + List responses = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.25), + // Some responses may contain zero results, so we must ensure that we can are resilient + // to such responses. + createResponse(AVRO_SCHEMA, Lists.newArrayList(), 0.25, 0.25), + createResponse(AVRO_SCHEMA, records.subList(2, 4), 0.3, 0.5), + createResponse(AVRO_SCHEMA, records.subList(4, 7), 0.7, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequest, "")) + .thenReturn(new FakeBigQueryServerStream<>(responses)); + + List streamBundle = + Lists.newArrayList(ReadStream.newBuilder().setName("readStream").build()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + BoundedReader reader = streamSource.createReader(options); + + // Before call to BoundedReader#start, fraction consumed must be zero. + assertEquals(0.0, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.start()); // Reads A. + assertEquals(0.125, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads B. + assertEquals(0.25, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads C. + assertEquals(0.4, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads D. + assertEquals(0.5, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads E. + assertEquals(0.8, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads F. + assertEquals(0.9, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads G. + assertEquals(1.0, reader.getFractionConsumed(), DELTA); + + assertFalse(reader.advance()); // Reaches the end. + + // We are done with the stream, so we should report 100% consumption. + assertEquals(Double.valueOf(1.0), reader.getFractionConsumed()); + } + + @Test + public void testFractionConsumedWithMultipleStreamsInBundle() throws Exception { + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(); + + ReadRowsRequest expectedRequestOne = + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(); + ReadRowsRequest expectedRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(); + + List records = + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), + createRecord("B", 2, AVRO_SCHEMA), + createRecord("C", 3, AVRO_SCHEMA), + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA), + createRecord("G", 7, AVRO_SCHEMA)); + + List responsesOne = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.5), + // Some responses may contain zero results, so we must ensure that we are resilient + // to such responses. + createResponse(AVRO_SCHEMA, Lists.newArrayList(), 0.5, 0.5), + createResponse(AVRO_SCHEMA, records.subList(2, 4), 0.5, 1.0)); + + List responsesTwo = + Lists.newArrayList(createResponse(AVRO_SCHEMA, records.subList(4, 7), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesOne)); + when(fakeStorageClient.readRows(expectedRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesTwo)); + + List streamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build()); + + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + BoundedReader reader = streamSource.createReader(options); + + // Before call to BoundedReader#start, fraction consumed must be zero. + assertEquals(0.0, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.start()); // Reads A. + assertEquals(0.125, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads B. + assertEquals(0.25, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads C. + assertEquals(0.375, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads D. + assertEquals(0.5, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads E. + assertEquals(0.6666666666666666, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads F. + assertEquals(0.8333333333333333, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads G. + assertEquals(1.0, reader.getFractionConsumed(), DELTA); + + assertFalse(reader.advance()); // Reaches the end. + + // We are done with the streams, so we should report 100% consumption. + assertEquals(Double.valueOf(1.0), reader.getFractionConsumed()); + } + + @Test + public void testStreamSourceSplitAtFractionNoOpWithOneStreamInBundle() throws Exception { + List responses = + Lists.newArrayList( + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), createRecord("B", 2, AVRO_SCHEMA)), + 0.0, + 0.25), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("C", 3, AVRO_SCHEMA)), 0.25, 0.50), + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("D", 4, AVRO_SCHEMA), createRecord("E", 5, AVRO_SCHEMA)), + 0.50, + 0.75)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("parentStream").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses)); + + List parentStreamBundle = + Lists.newArrayList(ReadStream.newBuilder().setName("parentStream").build()); + BigQueryStorageStreamBundleSource streamBundleSource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(), + parentStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the parent stream and ensure that records are returned in the + // prescribed order. + BoundedReader primary = streamBundleSource.createReader(options); + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + + // Now split the stream. Since we do NOT split below the granularity of a single stream, + // this will be a No-Op and the primary source should be read to completion. + BoundedSource secondary = primary.splitAtFraction(0.5); + assertNull(secondary); + + assertTrue(primary.advance()); + assertEquals("C", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("D", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("E", primary.getCurrent().get("name")); + assertFalse(primary.advance()); + } + + @Test + public void testStreamSourceSplitAtFractionWithMultipleStreamsInBundle() throws Exception { + List responses = + Lists.newArrayList( + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), createRecord("B", 2, AVRO_SCHEMA)), + 0.0, + 0.6), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("C", 3, AVRO_SCHEMA)), 0.6, 1.0), + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA)), + 0.0, + 1.0), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("G", 7, AVRO_SCHEMA)), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream3").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(3, 4))); + + List primaryStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build(), + ReadStream.newBuilder().setName("readStream3").build()); + + BigQueryStorageStreamBundleSource primarySource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(), + primaryStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the primary Source and ensure that records are returned in the + // prescribed order. + BoundedReader primary = primarySource.createReader(options); + + assertTrue(primary.start()); + + // Attempting to split at a sub-Stream level which is NOT supported by the + // `BigQueryStorageStreamBundleSource`. IOTW, since there are exactly 3 Streams in the Source, + // a split will only occur for fraction > 0.33. + BoundedSource secondarySource = primary.splitAtFraction(0.05); + assertNull(secondarySource); + + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("C", primary.getCurrent().get("name")); + + // Now split the primary Source, and ensure that the returned source points to a non-null + // StreamBundle containing Streams 2 & 3. + secondarySource = primary.splitAtFraction(0.5); + assertNotNull(secondarySource); + BoundedReader secondary = secondarySource.createReader(options); + + // Since the last two streams were split out the Primary source has been exhausted. + assertFalse(primary.advance()); + + assertTrue(secondary.start()); + assertEquals("D", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + assertEquals("E", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + assertEquals("F", secondary.getCurrent().get("name")); + assertTrue((secondary.advance())); + + // Since we have already started reading from the last Stream in the StreamBundle, splitting + // is now a no-op. + BoundedSource tertiarySource = secondary.splitAtFraction(0.55); + assertNull(tertiarySource); + + assertEquals("G", secondary.getCurrent().get("name")); + assertFalse((secondary.advance())); + } + + @Test + public void testStreamSourceSplitAtFractionRepeatedWithMultipleStreamInBundle() throws Exception { + List responses = + Lists.newArrayList( + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), createRecord("B", 2, AVRO_SCHEMA)), + 0.0, + 0.6), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("C", 3, AVRO_SCHEMA)), 0.6, 1.0), + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA)), + 0.0, + 1.0), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("G", 7, AVRO_SCHEMA)), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream3").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(3, 4))); + + List primaryStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build(), + ReadStream.newBuilder().setName("readStream3").build()); + + BigQueryStorageStreamBundleSource primarySource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(), + primaryStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the primary Source and ensure that records are returned in the + // prescribed order. + BoundedReader primary = primarySource.createReader(options); + + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("C", primary.getCurrent().get("name")); + + // Now split the primary Source, and ensure that the returned source points to a non-null + // StreamBundle containing ONLY Stream 3. Since there are exactly 3 Streams in the Source, + // a split will only occur for fraction > 0.33. + BoundedSource secondarySource = primary.splitAtFraction(0.7); + assertNotNull(secondarySource); + BoundedReader secondary = secondarySource.createReader(options); + assertTrue(secondary.start()); + assertEquals("G", secondary.getCurrent().get("name")); + assertFalse((secondary.advance())); + + // A second splitAtFraction() call on the primary source. The resulting source should + // contain a StreamBundle containing ONLY Stream 2. Since there are 2 Streams in the Source, + // a split will only occur for fraction > 0.50. + BoundedSource tertiarySource = primary.splitAtFraction(0.55); + assertNotNull(tertiarySource); + BoundedReader tertiary = tertiarySource.createReader(options); + assertTrue(tertiary.start()); + assertEquals("D", tertiary.getCurrent().get("name")); + assertTrue(tertiary.advance()); + assertEquals("E", tertiary.getCurrent().get("name")); + assertTrue(tertiary.advance()); + assertEquals("F", tertiary.getCurrent().get("name")); + assertFalse(tertiary.advance()); + + // A third attempt to split the primary source. This will be ignored since the primary source + // since the Source contains only a single stream now and `BigQueryStorageStreamBundleSource` + // does NOT support sub-stream splitting. + tertiarySource = primary.splitAtFraction(0.9); + assertNull(tertiarySource); + + // All the rows in the primary Source have been read. + assertFalse(primary.advance()); + } + + @Test + public void testStreamSourceSplitAtFractionFailsWhenParentIsPastSplitPoint() throws Exception { + List responses = + Lists.newArrayList( + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), createRecord("B", 2, AVRO_SCHEMA)), + 0.0, + 0.66), + createResponse( + AVRO_SCHEMA, Lists.newArrayList(createRecord("C", 3, AVRO_SCHEMA)), 0.66, 1.0), + createResponse( + AVRO_SCHEMA, + Lists.newArrayList( + createRecord("D", 4, AVRO_SCHEMA), createRecord("E", 5, AVRO_SCHEMA)), + 0.0, + 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + + List parentStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build()); + + BigQueryStorageStreamBundleSource streamBundleSource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .build(), + parentStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the parent bundle and ensure the records are returned in + // the prescribed order. + BoundedReader primary = streamBundleSource.createReader(options); + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("C", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("D", primary.getCurrent().get("name")); + + // We attempt to split the StreamBundle after starting to read the contents of the second + // stream. + BoundedSource secondarySource = primary.splitAtFraction(0.5); + assertNull(secondarySource); + + assertTrue(primary.advance()); + assertEquals("E", primary.getCurrent().get("name")); + assertFalse(primary.advance()); + } + + private static final class ParseKeyValue + implements SerializableFunction> { + + @Override + public KV apply(SchemaAndRecord input) { + return KV.of( + input.getRecord().get("name").toString(), (Long) input.getRecord().get("number")); + } + } + + @Test + public void testReadFromBigQueryIO() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(10L).setSchema(TABLE_SCHEMA); + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedCreateReadSessionRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table") + .setDataFormat(DataFormat.AVRO)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSessionName") + .setAvroSchema(AvroSchema.newBuilder().setSchema(AVRO_SCHEMA_STRING)) + .addStreams(ReadStream.newBuilder().setName("streamName1")) + .addStreams(ReadStream.newBuilder().setName("streamName2")) + .setDataFormat(DataFormat.AVRO) + .setEstimatedTotalBytesScanned(10L) + .build(); + + ReadRowsRequest expectedReadRowsRequestOne = + ReadRowsRequest.newBuilder().setReadStream("streamName1").build(); + ReadRowsRequest expectedReadRowsRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("streamName2").build(); + + List records = + Lists.newArrayList( + createRecord("A", 1, AVRO_SCHEMA), + createRecord("B", 2, AVRO_SCHEMA), + createRecord("C", 3, AVRO_SCHEMA), + createRecord("D", 4, AVRO_SCHEMA), + createRecord("E", 5, AVRO_SCHEMA), + createRecord("F", 6, AVRO_SCHEMA), + createRecord("G", 7, AVRO_SCHEMA)); + + List readRowsResponsesOne = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.50), + createResponse(AVRO_SCHEMA, records.subList(2, 4), 0.5, 1.0)); + List readRowsResponsesTwo = + Lists.newArrayList( + createResponse(AVRO_SCHEMA, records.subList(4, 5), 0.0, 0.33), + createResponse(AVRO_SCHEMA, records.subList(5, 7), 0.33, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class, withSettings().serializable()); + when(fakeStorageClient.createReadSession(expectedCreateReadSessionRequest)) + .thenReturn(readSession); + when(fakeStorageClient.readRows(expectedReadRowsRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesOne)); + when(fakeStorageClient.readRows(expectedReadRowsRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesTwo)); + + PCollection> output = + p.apply( + BigQueryIO.read(new ParseKeyValue()) + .from("foo.com:project:dataset.table") + .withMethod(Method.DIRECT_READ) + .withFormat(DataFormat.AVRO) + .withTestServices( + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient))); + + PAssert.that(output) + .containsInAnyOrder( + ImmutableList.of( + KV.of("A", 1L), + KV.of("B", 2L), + KV.of("C", 3L), + KV.of("D", 4L), + KV.of("E", 5L), + KV.of("F", 6L), + KV.of("G", 7L))); + + p.run(); + } + + @Test + public void testReadFromBigQueryIOWithTrimmedSchema() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(10L).setSchema(TABLE_SCHEMA); + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedCreateReadSessionRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table") + .setReadOptions( + ReadSession.TableReadOptions.newBuilder().addSelectedFields("name")) + .setDataFormat(DataFormat.AVRO)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSessionName") + .setAvroSchema(AvroSchema.newBuilder().setSchema(TRIMMED_AVRO_SCHEMA_STRING)) + .addStreams(ReadStream.newBuilder().setName("streamName1")) + .addStreams(ReadStream.newBuilder().setName("streamName2")) + .setDataFormat(DataFormat.AVRO) + .build(); + + ReadRowsRequest expectedReadRowsRequestOne = + ReadRowsRequest.newBuilder().setReadStream("streamName1").build(); + ReadRowsRequest expectedReadRowsRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("streamName2").build(); + + List records = + Lists.newArrayList( + createRecord("A", TRIMMED_AVRO_SCHEMA), + createRecord("B", TRIMMED_AVRO_SCHEMA), + createRecord("C", TRIMMED_AVRO_SCHEMA), + createRecord("D", TRIMMED_AVRO_SCHEMA), + createRecord("E", TRIMMED_AVRO_SCHEMA), + createRecord("F", TRIMMED_AVRO_SCHEMA), + createRecord("G", TRIMMED_AVRO_SCHEMA)); + + List readRowsResponsesOne = + Lists.newArrayList( + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.50), + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(2, 4), 0.5, 0.75)); + List readRowsResponsesTwo = + Lists.newArrayList( + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(4, 5), 0.0, 0.33), + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(5, 7), 0.33, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class, withSettings().serializable()); + when(fakeStorageClient.createReadSession(expectedCreateReadSessionRequest)) + .thenReturn(readSession); + when(fakeStorageClient.readRows(expectedReadRowsRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesOne)); + when(fakeStorageClient.readRows(expectedReadRowsRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesTwo)); + + PCollection output = + p.apply( + BigQueryIO.readTableRows() + .from("foo.com:project:dataset.table") + .withMethod(Method.DIRECT_READ) + .withSelectedFields(Lists.newArrayList("name")) + .withFormat(DataFormat.AVRO) + .withTestServices( + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient))); + + PAssert.that(output) + .containsInAnyOrder( + ImmutableList.of( + new TableRow().set("name", "A"), + new TableRow().set("name", "B"), + new TableRow().set("name", "C"), + new TableRow().set("name", "D"), + new TableRow().set("name", "E"), + new TableRow().set("name", "F"), + new TableRow().set("name", "G"))); + + p.run(); + } + + @Test + public void testReadFromBigQueryIOWithBeamSchema() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(10L).setSchema(TABLE_SCHEMA); + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedCreateReadSessionRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table") + .setReadOptions( + ReadSession.TableReadOptions.newBuilder().addSelectedFields("name")) + .setDataFormat(DataFormat.AVRO)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSessionName") + .setAvroSchema(AvroSchema.newBuilder().setSchema(TRIMMED_AVRO_SCHEMA_STRING)) + .addStreams(ReadStream.newBuilder().setName("streamName1")) + .addStreams(ReadStream.newBuilder().setName("streamName2")) + .setDataFormat(DataFormat.AVRO) + .build(); + + ReadRowsRequest expectedReadRowsRequestOne = + ReadRowsRequest.newBuilder().setReadStream("streamName1").build(); + ReadRowsRequest expectedReadRowsRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("streamName2").build(); + + List records = + Lists.newArrayList( + createRecord("A", TRIMMED_AVRO_SCHEMA), + createRecord("B", TRIMMED_AVRO_SCHEMA), + createRecord("C", TRIMMED_AVRO_SCHEMA), + createRecord("D", TRIMMED_AVRO_SCHEMA), + createRecord("E", TRIMMED_AVRO_SCHEMA), + createRecord("F", TRIMMED_AVRO_SCHEMA), + createRecord("G", TRIMMED_AVRO_SCHEMA)); + + List readRowsResponsesOne = + Lists.newArrayList( + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(0, 2), 0.0, 0.50), + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(2, 4), 0.5, 0.75)); + List readRowsResponsesTwo = + Lists.newArrayList( + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(4, 5), 0.0, 0.33), + createResponse(TRIMMED_AVRO_SCHEMA, records.subList(5, 7), 0.33, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class, withSettings().serializable()); + when(fakeStorageClient.createReadSession(expectedCreateReadSessionRequest)) + .thenReturn(readSession); + when(fakeStorageClient.readRows(expectedReadRowsRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesOne)); + when(fakeStorageClient.readRows(expectedReadRowsRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesTwo)); + + PCollection output = + p.apply( + BigQueryIO.readTableRowsWithSchema() + .from("foo.com:project:dataset.table") + .withMethod(Method.DIRECT_READ) + .withSelectedFields(Lists.newArrayList("name")) + .withFormat(DataFormat.AVRO) + .withTestServices( + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient))) + .apply(Convert.toRows()); + + org.apache.beam.sdk.schemas.Schema beamSchema = + org.apache.beam.sdk.schemas.Schema.of( + org.apache.beam.sdk.schemas.Schema.Field.of( + "name", org.apache.beam.sdk.schemas.Schema.FieldType.STRING)); + PAssert.that(output) + .containsInAnyOrder( + ImmutableList.of( + Row.withSchema(beamSchema).addValue("A").build(), + Row.withSchema(beamSchema).addValue("B").build(), + Row.withSchema(beamSchema).addValue("C").build(), + Row.withSchema(beamSchema).addValue("D").build(), + Row.withSchema(beamSchema).addValue("E").build(), + Row.withSchema(beamSchema).addValue("F").build(), + Row.withSchema(beamSchema).addValue("G").build())); + + p.run(); + } + + @Test + public void testReadFromBigQueryIOArrow() throws Exception { + fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); + TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); + Table table = new Table().setTableReference(tableRef).setNumBytes(10L).setSchema(TABLE_SCHEMA); + fakeDatasetService.createTable(table); + + CreateReadSessionRequest expectedCreateReadSessionRequest = + CreateReadSessionRequest.newBuilder() + .setParent("projects/project-id") + .setReadSession( + ReadSession.newBuilder() + .setTable("projects/foo.com:project/datasets/dataset/tables/table") + .setDataFormat(DataFormat.ARROW)) + .setMaxStreamCount(0) + .build(); + + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSessionName") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .addStreams(ReadStream.newBuilder().setName("streamName1")) + .addStreams(ReadStream.newBuilder().setName("streamName2")) + .setDataFormat(DataFormat.ARROW) + .build(); + + ReadRowsRequest expectedReadRowsRequestOne = + ReadRowsRequest.newBuilder().setReadStream("streamName1").build(); + ReadRowsRequest expectedReadRowsRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("streamName2").build(); + + List names = Arrays.asList("A", "B", "C", "D", "E", "F", "G"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L); + List readRowsResponsesOne = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.50), + createResponseArrow( + ARROW_SCHEMA, names.subList(2, 4), values.subList(2, 4), 0.5, 0.75)); + List readRowsResponsesTwo = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(4, 5), values.subList(4, 5), 0.0, 0.33), + createResponseArrow( + ARROW_SCHEMA, names.subList(5, 6), values.subList(5, 6), 0.33, 0.66), + createResponseArrow( + ARROW_SCHEMA, names.subList(6, 7), values.subList(6, 7), 0.66, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class, withSettings().serializable()); + when(fakeStorageClient.createReadSession(expectedCreateReadSessionRequest)) + .thenReturn(readSession); + when(fakeStorageClient.readRows(expectedReadRowsRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesOne)); + when(fakeStorageClient.readRows(expectedReadRowsRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(readRowsResponsesTwo)); + + PCollection> output = + p.apply( + BigQueryIO.read(new ParseKeyValue()) + .from("foo.com:project:dataset.table") + .withMethod(Method.DIRECT_READ) + .withFormat(DataFormat.ARROW) + .withTestServices( + new FakeBigQueryServices() + .withDatasetService(fakeDatasetService) + .withStorageClient(fakeStorageClient))); + + PAssert.that(output) + .containsInAnyOrder( + ImmutableList.of( + KV.of("A", 1L), + KV.of("B", 2L), + KV.of("C", 3L), + KV.of("D", 4L), + KV.of("E", 5L), + KV.of("F", 6L), + KV.of("G", 7L))); + + p.run(); + } + + @Test + public void testReadFromStreamSourceArrow() throws Exception { + + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(); + + ReadRowsRequest expectedRequest = + ReadRowsRequest.newBuilder().setReadStream("readStream").build(); + + List names = Arrays.asList("A", "B", "C"); + List values = Arrays.asList(1L, 2L, 3L); + List responses = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.50), + createResponseArrow( + ARROW_SCHEMA, names.subList(2, 3), values.subList(2, 3), 0.5, 0.75)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequest, "")) + .thenReturn(new FakeBigQueryServerStream<>(responses)); + + List streamBundle = + Lists.newArrayList(ReadStream.newBuilder().setName("readStream").build()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + List rows = new ArrayList<>(); + BoundedReader reader = streamSource.createReader(options); + for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) { + rows.add(reader.getCurrent()); + } + + System.out.println("Rows: " + rows); + + assertEquals(3, rows.size()); + } + + @Test + public void testFractionConsumedWithArrowAndOneStreamInBundle() throws Exception { + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(); + + ReadRowsRequest expectedRequest = + ReadRowsRequest.newBuilder().setReadStream("readStream").build(); + + List names = Arrays.asList("A", "B", "C", "D", "E", "F", "G"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L); + List responses = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.25), + createResponseArrow( + ARROW_SCHEMA, Lists.newArrayList(), Lists.newArrayList(), 0.25, 0.25), + createResponseArrow(ARROW_SCHEMA, names.subList(2, 4), values.subList(2, 4), 0.3, 0.5), + createResponseArrow(ARROW_SCHEMA, names.subList(4, 7), values.subList(4, 7), 0.7, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequest, "")) + .thenReturn(new FakeBigQueryServerStream<>(responses)); + + List streamBundle = + Lists.newArrayList(ReadStream.newBuilder().setName("readStream").build()); + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + BoundedReader reader = streamSource.createReader(options); + + // Before call to BoundedReader#start, fraction consumed must be zero. + assertEquals(0.0, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.start()); // Reads A. + assertEquals(0.125, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads B. + assertEquals(0.25, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads C. + assertEquals(0.4, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads D. + assertEquals(0.5, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads E. + assertEquals(0.8, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads F. + assertEquals(0.9, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads G. + assertEquals(1.0, reader.getFractionConsumed(), DELTA); + + assertFalse(reader.advance()); // Reaches the end. + + // We are done with the stream, so we should report 100% consumption. + assertEquals(Double.valueOf(1.0), reader.getFractionConsumed()); + } + + @Test + public void testFractionConsumedWithArrowAndMultipleStreamsInBundle() throws Exception { + ReadSession readSession = + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(); + + ReadRowsRequest expectedRequestOne = + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(); + ReadRowsRequest expectedRequestTwo = + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(); + + List names = Arrays.asList("A", "B", "C", "D", "E", "F", "G"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L); + List responsesOne = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.5), + createResponseArrow(ARROW_SCHEMA, Lists.newArrayList(), Lists.newArrayList(), 0.5, 0.5), + createResponseArrow(ARROW_SCHEMA, names.subList(2, 4), values.subList(2, 4), 0.5, 1.0)); + + List responsesTwo = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(4, 7), values.subList(4, 7), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows(expectedRequestOne, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesOne)); + when(fakeStorageClient.readRows(expectedRequestTwo, "")) + .thenReturn(new FakeBigQueryServerStream<>(responsesTwo)); + + List streamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build()); + + BigQueryStorageStreamBundleSource streamSource = + BigQueryStorageStreamBundleSource.create( + readSession, + streamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + BoundedReader reader = streamSource.createReader(options); + + // Before call to BoundedReader#start, fraction consumed must be zero. + assertEquals(0.0, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.start()); // Reads A. + assertEquals(0.125, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads B. + assertEquals(0.25, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads C. + assertEquals(0.375, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads D. + assertEquals(0.5, reader.getFractionConsumed(), DELTA); + + assertTrue(reader.advance()); // Reads E. + assertEquals(0.6666666666666666, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads F. + assertEquals(0.8333333333333333, reader.getFractionConsumed(), DELTA); + assertTrue(reader.advance()); // Reads G. + assertEquals(1.0, reader.getFractionConsumed(), DELTA); + + assertFalse(reader.advance()); // Reaches the end. + + // We are done with the streams, so we should report 100% consumption. + assertEquals(Double.valueOf(1.0), reader.getFractionConsumed()); + } + + @Test + public void testStreamSourceSplitAtFractionWithArrowAndMultipleStreamsInBundle() + throws Exception { + List names = Arrays.asList("A", "B", "C", "D", "E", "F", "G"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L); + List responses = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.6), + createResponseArrow(ARROW_SCHEMA, names.subList(2, 3), values.subList(2, 3), 0.6, 1.0), + createResponseArrow(ARROW_SCHEMA, names.subList(3, 6), values.subList(3, 6), 0.0, 1.0), + createResponseArrow(ARROW_SCHEMA, names.subList(6, 7), values.subList(6, 7), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream3").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(3, 4))); + + List primaryStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build(), + ReadStream.newBuilder().setName("readStream3").build()); + + BigQueryStorageStreamBundleSource primarySource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(), + primaryStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the primary bundle and ensure that records are returned in the + // prescribed order. + BoundedReader primary = primarySource.createReader(options); + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + + // Now split the StreamBundle, and ensure that the returned source points to a non-null + // secondary StreamBundle. + BoundedSource secondarySource = primary.splitAtFraction(0.35); + assertNotNull(secondarySource); + BoundedReader secondary = secondarySource.createReader(options); + + assertEquals("C", primary.getCurrent().get("name")); + assertFalse(primary.advance()); + + assertTrue(secondary.start()); + assertEquals("D", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + assertEquals("E", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + assertEquals("F", secondary.getCurrent().get("name")); + assertTrue((secondary.advance())); + assertEquals("G", secondary.getCurrent().get("name")); + assertFalse((secondary.advance())); + } + + @Test + public void testStreamSourceSplitAtFractionRepeatedWithArrowAndMultipleStreamsInBundle() + throws Exception { + List names = Arrays.asList("A", "B", "C", "D", "E", "F", "G"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L); + List responses = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.6), + createResponseArrow(ARROW_SCHEMA, names.subList(2, 3), values.subList(2, 3), 0.6, 1.0), + createResponseArrow(ARROW_SCHEMA, names.subList(3, 6), values.subList(3, 6), 0.0, 1.0), + createResponseArrow(ARROW_SCHEMA, names.subList(6, 7), values.subList(6, 7), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream3").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(3, 4))); + + List primaryStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build(), + ReadStream.newBuilder().setName("readStream3").build()); + + BigQueryStorageStreamBundleSource primarySource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(), + primaryStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the primary bundle and ensure that records are returned in the + // prescribed order. + BoundedReader primary = primarySource.createReader(options); + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + + // Now split the StreamBundle, and ensure that the returned source points to a non-null + // secondary StreamBundle. Since there are 3 streams in this Bundle, splitting will only + // occur when fraction >= 0.33. + BoundedSource secondarySource = primary.splitAtFraction(0.35); + assertNotNull(secondarySource); + BoundedReader secondary = secondarySource.createReader(options); + + assertEquals("C", primary.getCurrent().get("name")); + assertFalse(primary.advance()); + + assertTrue(secondary.start()); + assertEquals("D", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + assertEquals("E", secondary.getCurrent().get("name")); + assertTrue(secondary.advance()); + + // Now split the StreamBundle again, and ensure that the returned source points to a non-null + // tertiary StreamBundle. Since there are 2 streams in this Bundle, splitting will only + // occur when fraction >= 0.5. + BoundedSource tertiarySource = secondary.splitAtFraction(0.5); + assertNotNull(tertiarySource); + BoundedReader tertiary = tertiarySource.createReader(options); + + assertEquals("F", secondary.getCurrent().get("name")); + assertFalse((secondary.advance())); + + assertTrue(tertiary.start()); + assertEquals("G", tertiary.getCurrent().get("name")); + assertFalse((tertiary.advance())); + } + + @Test + public void testStreamSourceSplitAtFractionFailsWhenParentIsPastSplitPointArrow() + throws Exception { + List names = Arrays.asList("A", "B", "C", "D", "E"); + List values = Arrays.asList(1L, 2L, 3L, 4L, 5L); + List responses = + Lists.newArrayList( + createResponseArrow(ARROW_SCHEMA, names.subList(0, 2), values.subList(0, 2), 0.0, 0.66), + createResponseArrow(ARROW_SCHEMA, names.subList(2, 3), values.subList(2, 3), 0.66, 1.0), + createResponseArrow(ARROW_SCHEMA, names.subList(3, 5), values.subList(3, 5), 0.0, 1.0)); + + StorageClient fakeStorageClient = mock(StorageClient.class); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream1").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(0, 2))); + when(fakeStorageClient.readRows( + ReadRowsRequest.newBuilder().setReadStream("readStream2").build(), "")) + .thenReturn(new FakeBigQueryServerStream<>(responses.subList(2, 3))); + + List parentStreamBundle = + Lists.newArrayList( + ReadStream.newBuilder().setName("readStream1").build(), + ReadStream.newBuilder().setName("readStream2").build()); + + BigQueryStorageStreamBundleSource streamBundleSource = + BigQueryStorageStreamBundleSource.create( + ReadSession.newBuilder() + .setName("readSession") + .setArrowSchema( + ArrowSchema.newBuilder() + .setSerializedSchema(serializeArrowSchema(ARROW_SCHEMA)) + .build()) + .setDataFormat(DataFormat.ARROW) + .build(), + parentStreamBundle, + TABLE_SCHEMA, + new TableRowParser(), + TableRowJsonCoder.of(), + new FakeBigQueryServices().withStorageClient(fakeStorageClient), + 1L); + + // Read a few records from the parent bundle and ensure the records are returned in + // the prescribed order. + BoundedReader primary = streamBundleSource.createReader(options); + assertTrue(primary.start()); + assertEquals("A", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("B", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("C", primary.getCurrent().get("name")); + assertTrue(primary.advance()); + assertEquals("D", primary.getCurrent().get("name")); + + // We attempt to split the StreamBundle after starting to read the contents of the second + // stream. + BoundedSource secondarySource = primary.splitAtFraction(0.5); + assertNull(secondarySource); + + assertTrue(primary.advance()); + assertEquals("E", primary.getCurrent().get("name")); + assertFalse(primary.advance()); + } + + @Test + public void testActuateProjectionPushdown() { + org.apache.beam.sdk.schemas.Schema schema = + org.apache.beam.sdk.schemas.Schema.builder() + .addStringField("foo") + .addStringField("bar") + .build(); + TypedRead read = + BigQueryIO.read( + record -> + BigQueryUtils.toBeamRow( + record.getRecord(), schema, ConversionOptions.builder().build())) + .withMethod(Method.DIRECT_READ) + .withCoder(SchemaCoder.of(schema)); + + assertTrue(read.supportsProjectionPushdown()); + PTransform> pushdownT = + read.actuateProjectionPushdown( + ImmutableMap.of(new TupleTag<>("output"), FieldAccessDescriptor.withFieldNames("foo"))); + + TypedRead pushdownRead = (TypedRead) pushdownT; + assertEquals(Method.DIRECT_READ, pushdownRead.getMethod()); + assertThat(pushdownRead.getSelectedFields().get(), Matchers.containsInAnyOrder("foo")); + assertTrue(pushdownRead.getProjectionPushdownApplied()); + } + + @Test + public void testReadFromQueryDoesNotSupportProjectionPushdown() { + org.apache.beam.sdk.schemas.Schema schema = + org.apache.beam.sdk.schemas.Schema.builder() + .addStringField("foo") + .addStringField("bar") + .build(); + TypedRead read = + BigQueryIO.read( + record -> + BigQueryUtils.toBeamRow( + record.getRecord(), schema, ConversionOptions.builder().build())) + .fromQuery("SELECT bar FROM `dataset.table`") + .withMethod(Method.DIRECT_READ) + .withCoder(SchemaCoder.of(schema)); + + assertFalse(read.supportsProjectionPushdown()); + assertThrows( + IllegalArgumentException.class, + () -> + read.actuateProjectionPushdown( + ImmutableMap.of( + new TupleTag<>("output"), FieldAccessDescriptor.withFieldNames("foo")))); + } + + private static org.apache.arrow.vector.types.pojo.Field field( + String name, + boolean nullable, + ArrowType type, + org.apache.arrow.vector.types.pojo.Field... children) { + return new org.apache.arrow.vector.types.pojo.Field( + name, + new org.apache.arrow.vector.types.pojo.FieldType(nullable, type, null, null), + asList(children)); + } + + static org.apache.arrow.vector.types.pojo.Field field( + String name, ArrowType type, org.apache.arrow.vector.types.pojo.Field... children) { + return field(name, false, type, children); + } +} diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java index eacb95a9a683..cbf91d7c5637 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java @@ -48,12 +48,12 @@ import org.apache.avro.LogicalTypes; import org.apache.avro.generic.GenericData; import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.ConversionOptions.TruncateTimestamps; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType; import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; import org.joda.time.DateTime; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java index d143315ee59d..c0e970bab849 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProtoTest.java @@ -94,6 +94,12 @@ public class TableRowToStorageApiProtoTest { .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampISOValue")) .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueLong")) .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueSpace")) + .add( + new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueSpaceUtc")) + .add( + new TableFieldSchema() + .setType("TIMESTAMP") + .setName("timestampValueZoneRegion")) .add( new TableFieldSchema() .setType("TIMESTAMP") @@ -133,6 +139,12 @@ public class TableRowToStorageApiProtoTest { .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampISOValue")) .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueLong")) .add(new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueSpace")) + .add( + new TableFieldSchema().setType("TIMESTAMP").setName("timestampValueSpaceUtc")) + .add( + new TableFieldSchema() + .setType("TIMESTAMP") + .setName("timestampValueZoneRegion")) .add( new TableFieldSchema() .setType("TIMESTAMP") @@ -295,25 +307,39 @@ public class TableRowToStorageApiProtoTest { .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("timestampvaluespacemilli") + .setName("timestampvaluespaceutc") .setNumber(22) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("timestampvaluespacetrailingzero") + .setName("timestampvaluezoneregion") .setNumber(23) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("datetimevaluespace") + .setName("timestampvaluespacemilli") .setNumber(24) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) + .addField( + FieldDescriptorProto.newBuilder() + .setName("timestampvaluespacetrailingzero") + .setNumber(25) + .setType(Type.TYPE_INT64) + .setLabel(Label.LABEL_OPTIONAL) + .build()) + .addField( + FieldDescriptorProto.newBuilder() + .setName("datetimevaluespace") + .setNumber(26) + .setType(Type.TYPE_INT64) + .setLabel(Label.LABEL_OPTIONAL) + .build()) .build(); private static final DescriptorProto BASE_TABLE_SCHEMA_NO_F_PROTO = @@ -460,25 +486,39 @@ public class TableRowToStorageApiProtoTest { .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("timestampvaluespacemilli") + .setName("timestampvaluespaceutc") .setNumber(21) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("timestampvaluespacetrailingzero") + .setName("timestampvaluezoneregion") .setNumber(22) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) .addField( FieldDescriptorProto.newBuilder() - .setName("datetimevaluespace") + .setName("timestampvaluespacemilli") .setNumber(23) .setType(Type.TYPE_INT64) .setLabel(Label.LABEL_OPTIONAL) .build()) + .addField( + FieldDescriptorProto.newBuilder() + .setName("timestampvaluespacetrailingzero") + .setNumber(24) + .setType(Type.TYPE_INT64) + .setLabel(Label.LABEL_OPTIONAL) + .build()) + .addField( + FieldDescriptorProto.newBuilder() + .setName("datetimevaluespace") + .setNumber(25) + .setType(Type.TYPE_INT64) + .setLabel(Label.LABEL_OPTIONAL) + .build()) .build(); private static final TableSchema NESTED_TABLE_SCHEMA = new TableSchema() @@ -621,6 +661,8 @@ public void testNestedFromTableSchema() { new TableCell().setV("1970-01-01T00:00:00.000+01:00"), new TableCell().setV("1234567"), new TableCell().setV("1970-01-01 00:00:00.000343"), + new TableCell().setV("1970-01-01 00:00:00.000343 UTC"), + new TableCell().setV("1970-01-01 00:00:00.123456 America/New_York"), new TableCell().setV("1970-01-01 00:00:00.123"), new TableCell().setV("1970-01-01 00:00:00.1230"), new TableCell().setV("2019-08-16 00:52:07.123456"))); @@ -650,6 +692,8 @@ public void testNestedFromTableSchema() { .set("timestampValueLong", "1234567") // UTC time for backwards compatibility .set("timestampValueSpace", "1970-01-01 00:00:00.000343") + .set("timestampValueSpaceUtc", "1970-01-01 00:00:00.000343 UTC") + .set("timestampValueZoneRegion", "1970-01-01 00:00:00.123456 America/New_York") .set("timestampValueSpaceMilli", "1970-01-01 00:00:00.123") .set("timestampValueSpaceTrailingZero", "1970-01-01 00:00:00.1230") .set("datetimeValueSpace", "2019-08-16 00:52:07.123456"); @@ -686,6 +730,8 @@ public void testNestedFromTableSchema() { .put("timestampisovalue", -3600000000L) .put("timestampvaluelong", 1234567000L) .put("timestampvaluespace", 343L) + .put("timestampvaluespaceutc", 343L) + .put("timestampvaluezoneregion", 18000123456L) .put("timestampvaluespacemilli", 123000L) .put("timestampvaluespacetrailingzero", 123000L) .put("datetimevaluespace", 142111881387172416L) @@ -722,6 +768,8 @@ public void testNestedFromTableSchema() { .put("timestampisovalue", -3600000000L) .put("timestampvaluelong", 1234567000L) .put("timestampvaluespace", 343L) + .put("timestampvaluespaceutc", 343L) + .put("timestampvaluezoneregion", 18000123456L) .put("timestampvaluespacemilli", 123000L) .put("timestampvaluespacetrailingzero", 123000L) .put("datetimevaluespace", 142111881387172416L) diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java index 09775f7e0768..c7b3ce764a7f 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIOTest.java @@ -48,10 +48,10 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.reflect.AvroSchema; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.protobuf.Proto3SchemaMessages.Primitive; import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; import org.apache.beam.sdk.extensions.protobuf.ProtoDomain; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubReadSchemaTransformProviderTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubReadSchemaTransformProviderTest.java index f91183aa050c..aaceda5342db 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubReadSchemaTransformProviderTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubReadSchemaTransformProviderTest.java @@ -37,9 +37,9 @@ import java.util.Objects; import java.util.UUID; import java.util.stream.Collectors; +import org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.AutoValueSchema; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer; import org.apache.beam.sdk.schemas.transforms.SchemaTransform; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubRowToMessageTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubRowToMessageTest.java index 029c8ef08a4a..2ff0084bc086 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubRowToMessageTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubRowToMessageTest.java @@ -42,6 +42,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.io.gcp.pubsub.PubsubRowToMessage.FieldMatcher; import org.apache.beam.sdk.io.gcp.pubsub.PubsubRowToMessage.PubsubRowToMessageDoFn; import org.apache.beam.sdk.io.gcp.pubsub.PubsubRowToMessage.SchemaReflection; @@ -52,7 +53,6 @@ import org.apache.beam.sdk.schemas.Schema.Field; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.Schema.TypeName; -import org.apache.beam.sdk.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.JsonPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaTransformMessageToRowFactoryTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaTransformMessageToRowFactoryTest.java index ad75e24ee9ad..709fc35e02ae 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaTransformMessageToRowFactoryTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaTransformMessageToRowFactoryTest.java @@ -28,9 +28,9 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; -import org.apache.beam.sdk.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.JsonPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer; import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializerProvider; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubWriteSchemaTransformProviderTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubWriteSchemaTransformProviderTest.java index b9c912ffea68..98939f7ddc68 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubWriteSchemaTransformProviderTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubWriteSchemaTransformProviderTest.java @@ -39,6 +39,7 @@ import java.nio.charset.StandardCharsets; import java.util.Map; import org.apache.avro.SchemaParseException; +import org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SchemaPath; import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath; import org.apache.beam.sdk.io.gcp.pubsub.PubsubTestClient.PubsubTestClientFactory; @@ -49,7 +50,6 @@ import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.Field; import org.apache.beam.sdk.schemas.Schema.FieldType; -import org.apache.beam.sdk.schemas.io.payloads.AvroPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.JsonPayloadSerializerProvider; import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/encoder/TimestampEncodingTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/encoder/TimestampEncodingTest.java index 7afdb35a2070..aaa1657b1f28 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/encoder/TimestampEncodingTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/encoder/TimestampEncodingTest.java @@ -32,7 +32,7 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; import org.apache.avro.reflect.AvroEncode; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.junit.Before; import org.junit.Test; diff --git a/sdks/java/io/hadoop-format/build.gradle b/sdks/java/io/hadoop-format/build.gradle index 2a920de60fa5..8b938bdc27b6 100644 --- a/sdks/java/io/hadoop-format/build.gradle +++ b/sdks/java/io/hadoop-format/build.gradle @@ -69,6 +69,7 @@ dependencies { // on the classpath before the one provided by :sdks:java:core shadowTest. testImplementation "com.github.jbellis:jamm:0.3.0" testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") testImplementation project(":sdks:java:io:jdbc") diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/Employee.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/Employee.java index fe2cda132520..58ff2005594e 100644 --- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/Employee.java +++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/Employee.java @@ -18,8 +18,8 @@ package org.apache.beam.sdk.io.hadoop.format; import java.util.Objects; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.checkerframework.checker.nullness.qual.Nullable; /** diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOReadTest.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOReadTest.java index be3191dfeab9..da12e9d04142 100644 --- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOReadTest.java +++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOReadTest.java @@ -32,11 +32,11 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.NullableCoder; import org.apache.beam.sdk.coders.RowCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.BoundedSource.BoundedReader; import org.apache.beam.sdk.io.hadoop.SerializableConfiguration; diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/TestRowDBWritable.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/TestRowDBWritable.java index 2d10bdbd269d..f8ef1e71d15a 100644 --- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/TestRowDBWritable.java +++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/TestRowDBWritable.java @@ -23,8 +23,8 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.common.TestRow; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.lib.db.DBWritable; diff --git a/sdks/java/io/influxdb/build.gradle b/sdks/java/io/influxdb/build.gradle index f890f35a8b09..cca0d01cff05 100644 --- a/sdks/java/io/influxdb/build.gradle +++ b/sdks/java/io/influxdb/build.gradle @@ -34,5 +34,6 @@ dependencies { testImplementation library.java.powermock_mockito testImplementation library.java.mockito_core testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") } diff --git a/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/Model.java b/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/Model.java index f8eec6f2d029..957ce4837289 100644 --- a/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/Model.java +++ b/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/Model.java @@ -20,8 +20,8 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.influxdb.dto.Point; @DefaultCoder(AvroCoder.class) diff --git a/sdks/java/io/jdbc/build.gradle b/sdks/java/io/jdbc/build.gradle index 71f7ff363877..379b073eb786 100644 --- a/sdks/java/io/jdbc/build.gradle +++ b/sdks/java/io/jdbc/build.gradle @@ -37,6 +37,7 @@ dependencies { testImplementation "org.apache.derby:derbyclient:10.14.2.0" testImplementation "org.apache.derby:derbynet:10.14.2.0" testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") testImplementation library.java.junit diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProvider.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProvider.java index 77ec4082f6f4..b5969e318099 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProvider.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProvider.java @@ -67,6 +67,10 @@ public Schema configurationSchema() { .addNullableField("fetchSize", FieldType.INT16) .addNullableField("outputParallelization", FieldType.BOOLEAN) .addNullableField("autosharding", FieldType.BOOLEAN) + // Partitioning support. If you specify a partition column we will use that instead of + // readQuery + .addNullableField("partitionColumn", FieldType.STRING) + .addNullableField("partitions", FieldType.INT16) .build(); } @@ -110,26 +114,49 @@ public PTransform> buildReader() { return new PTransform>() { @Override public PCollection expand(PBegin input) { - @Nullable String readQuery = config.getString("readQuery"); - if (readQuery == null) { - readQuery = String.format("SELECT * FROM %s", location); - } - - JdbcIO.ReadRows readRows = - JdbcIO.readRows() - .withDataSourceConfiguration(getDataSourceConfiguration()) - .withQuery(readQuery); - - @Nullable Short fetchSize = config.getInt16("fetchSize"); - if (fetchSize != null) { - readRows = readRows.withFetchSize(fetchSize); - } - @Nullable Boolean outputParallelization = config.getBoolean("outputParallelization"); - if (outputParallelization != null) { - readRows = readRows.withOutputParallelization(outputParallelization); + // If we define a partition column we need to go a different route + @Nullable + String partitionColumn = + config.getSchema().hasField("partitionColumn") + ? config.getString("partitionColumn") + : null; + if (partitionColumn != null) { + JdbcIO.ReadWithPartitions readRows = + JdbcIO.readWithPartitions() + .withDataSourceConfiguration(getDataSourceConfiguration()) + .withTable(location) + .withPartitionColumn(partitionColumn) + .withRowOutput(); + @Nullable Short partitions = config.getInt16("partitions"); + if (partitions != null) { + readRows = readRows.withNumPartitions(partitions); + } + return input.apply(readRows); + } else { + + @Nullable String readQuery = config.getString("readQuery"); + if (readQuery == null) { + readQuery = String.format("SELECT * FROM %s", location); + } + + JdbcIO.ReadRows readRows = + JdbcIO.readRows() + .withDataSourceConfiguration(getDataSourceConfiguration()) + .withQuery(readQuery); + + @Nullable Short fetchSize = config.getInt16("fetchSize"); + if (fetchSize != null) { + readRows = readRows.withFetchSize(fetchSize); + } + + @Nullable Boolean outputParallelization = config.getBoolean("outputParallelization"); + if (outputParallelization != null) { + readRows = readRows.withOutputParallelization(outputParallelization); + } + + return input.apply(readRows); } - return input.apply(readRows); } }; } diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProviderTest.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProviderTest.java new file mode 100644 index 000000000000..d91eaaef6e62 --- /dev/null +++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcSchemaIOProviderTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.jdbc; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import javax.sql.DataSource; +import org.apache.beam.sdk.io.common.DatabaseTestHelper; +import org.apache.beam.sdk.io.common.TestRow; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Count; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class JdbcSchemaIOProviderTest { + + private static final JdbcIO.DataSourceConfiguration DATA_SOURCE_CONFIGURATION = + JdbcIO.DataSourceConfiguration.create( + "org.apache.derby.jdbc.EmbeddedDriver", "jdbc:derby:memory:testDB;create=true"); + private static final int EXPECTED_ROW_COUNT = 1000; + + private static final DataSource DATA_SOURCE = DATA_SOURCE_CONFIGURATION.buildDatasource(); + private static final String READ_TABLE_NAME = DatabaseTestHelper.getTestTableName("UT_READ"); + + @Rule public final transient TestPipeline pipeline = TestPipeline.create(); + + @BeforeClass + public static void beforeClass() throws Exception { + // by default, derby uses a lock timeout of 60 seconds. In order to speed up the test + // and detect the lock faster, we decrease this timeout + System.setProperty("derby.locks.waitTimeout", "2"); + System.setProperty("derby.stream.error.file", "build/derby.log"); + + DatabaseTestHelper.createTable(DATA_SOURCE, READ_TABLE_NAME); + addInitialData(DATA_SOURCE, READ_TABLE_NAME); + } + + @Test + public void testPartitionedRead() { + JdbcSchemaIOProvider provider = new JdbcSchemaIOProvider(); + + Row config = + Row.withSchema(provider.configurationSchema()) + .withFieldValue("driverClassName", DATA_SOURCE_CONFIGURATION.getDriverClassName().get()) + .withFieldValue("jdbcUrl", DATA_SOURCE_CONFIGURATION.getUrl().get()) + .withFieldValue("username", "") + .withFieldValue("password", "") + .withFieldValue("partitionColumn", "id") + .withFieldValue("partitions", (short) 10) + .build(); + JdbcSchemaIOProvider.JdbcSchemaIO schemaIO = + provider.from(READ_TABLE_NAME, config, Schema.builder().build()); + PCollection output = pipeline.apply(schemaIO.buildReader()); + Long expected = Long.valueOf(EXPECTED_ROW_COUNT); + PAssert.that(output.apply(Count.globally())).containsInAnyOrder(expected); + pipeline.run(); + } + + // This test shouldn't work because we only support numeric and datetime columns and we are trying + // to use a string + // column as our partition source + @Test + public void testPartitionedReadThatShouldntWork() throws Exception { + JdbcSchemaIOProvider provider = new JdbcSchemaIOProvider(); + + Row config = + Row.withSchema(provider.configurationSchema()) + .withFieldValue("driverClassName", DATA_SOURCE_CONFIGURATION.getDriverClassName().get()) + .withFieldValue("jdbcUrl", DATA_SOURCE_CONFIGURATION.getUrl().get()) + .withFieldValue("username", "") + .withFieldValue("password", "") + .withFieldValue("partitionColumn", "name") + .withFieldValue("partitions", (short) 10) + .build(); + JdbcSchemaIOProvider.JdbcSchemaIO schemaIO = + provider.from(READ_TABLE_NAME, config, Schema.builder().build()); + PCollection output = pipeline.apply(schemaIO.buildReader()); + Long expected = Long.valueOf(EXPECTED_ROW_COUNT); + PAssert.that(output.apply(Count.globally())).containsInAnyOrder(expected); + try { + pipeline.run(); + } catch (Exception e) { + e.printStackTrace(); + return; + } + throw new Exception("Did not throw an exception"); + } + + /** Create test data that is consistent with that generated by TestRow. */ + private static void addInitialData(DataSource dataSource, String tableName) throws SQLException { + try (Connection connection = dataSource.getConnection()) { + connection.setAutoCommit(false); + try (PreparedStatement preparedStatement = + connection.prepareStatement(String.format("insert into %s values (?,?)", tableName))) { + for (int i = 0; i < EXPECTED_ROW_COUNT; i++) { + preparedStatement.clearParameters(); + preparedStatement.setInt(1, i); + preparedStatement.setString(2, TestRow.getNameForSeed(i)); + preparedStatement.executeUpdate(); + } + } + connection.commit(); + } + } +} diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/SchemaUtilTest.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/SchemaUtilTest.java index 9d0770e2704d..080a451d706f 100644 --- a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/SchemaUtilTest.java +++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/SchemaUtilTest.java @@ -37,8 +37,8 @@ import java.sql.Types; import java.time.ZoneId; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; import org.joda.time.DateTime; diff --git a/sdks/java/io/kafka/build.gradle b/sdks/java/io/kafka/build.gradle index 8d64cf0bf971..9e3fe0f7341f 100644 --- a/sdks/java/io/kafka/build.gradle +++ b/sdks/java/io/kafka/build.gradle @@ -59,6 +59,7 @@ dependencies { provided library.java.jackson_dataformat_csv permitUnusedDeclared library.java.jackson_dataformat_csv implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(":runners:core-construction-java") implementation project(":sdks:java:expansion-service") permitUnusedDeclared project(":sdks:java:expansion-service") // BEAM-11761 @@ -90,6 +91,7 @@ dependencies { provided library.java.everit_json_schema testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") testImplementation project(":sdks:java:io:synthetic") + testImplementation project(":sdks:java:extensions:avro") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") // For testing Cross-language transforms diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProvider.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProvider.java index 68a0b0522e9f..85c93c863ad2 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProvider.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProvider.java @@ -31,9 +31,9 @@ import org.apache.avro.Schema; import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; import org.apache.kafka.common.serialization.Deserializer; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaCheckpointMark.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaCheckpointMark.java index 0b34262864aa..8e34cfee3d29 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaCheckpointMark.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaCheckpointMark.java @@ -21,8 +21,8 @@ import java.util.List; import java.util.Optional; import org.apache.avro.reflect.AvroIgnore; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.UnboundedSource; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index 26c895ddc89f..9bfd4723f6c8 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -44,7 +44,6 @@ import org.apache.beam.sdk.annotations.Experimental.Kind; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderRegistry; @@ -54,6 +53,7 @@ import org.apache.beam.sdk.coders.VarLongCoder; import org.apache.beam.sdk.coders.VoidCoder; import org.apache.beam.sdk.expansion.ExternalTransformRegistrar; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.Read.Unbounded; import org.apache.beam.sdk.io.UnboundedSource; import org.apache.beam.sdk.io.UnboundedSource.CheckpointMark; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java index 89c8d986ac1b..86d7c763206c 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java @@ -21,13 +21,13 @@ import java.util.List; import java.util.Objects; import org.apache.avro.generic.GenericRecord; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.transforms.Convert; import org.apache.beam.sdk.schemas.transforms.SchemaTransform; import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider; import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.utils.JsonUtils; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java index d35e1c60f5ee..4af13bbf4749 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java @@ -24,8 +24,8 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.UnboundedSource; import org.apache.beam.sdk.io.kafka.KafkaIO.Read; import org.apache.beam.sdk.options.PipelineOptions; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriteSchemaTransformProvider.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriteSchemaTransformProvider.java index 91dd6538e0ed..af7211f6a221 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriteSchemaTransformProvider.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriteSchemaTransformProvider.java @@ -26,13 +26,13 @@ import java.util.Map; import java.util.Set; import javax.annotation.Nullable; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.AutoValueSchema; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; import org.apache.beam.sdk.schemas.transforms.SchemaTransform; import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider; import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.utils.JsonUtils; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProviderTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProviderTest.java index 4f99fac88680..ee276ae88489 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProviderTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ConfluentSchemaRegistryDeserializerProviderTest.java @@ -29,8 +29,8 @@ import java.util.Map; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.kafka.common.serialization.Serializer; import org.junit.Test; diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 8f6dc10a95da..a1d5baf49267 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -40,6 +40,7 @@ import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.NullableCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.io.common.HashingFn; @@ -56,7 +57,6 @@ import org.apache.beam.sdk.options.StreamingOptions; import org.apache.beam.sdk.options.Validation; import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.schemas.utils.JsonUtils; import org.apache.beam.sdk.testing.ExpectedLogs; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/parquet/build.gradle b/sdks/java/io/parquet/build.gradle index 2c46a41120f8..84d10d2be175 100644 --- a/sdks/java/io/parquet/build.gradle +++ b/sdks/java/io/parquet/build.gradle @@ -40,6 +40,7 @@ def parquet_version = "1.12.0" dependencies { implementation library.java.vendored_guava_26_0_jre implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:io:hadoop-common") implementation library.java.slf4j_api implementation "org.apache.parquet:parquet-avro:$parquet_version" @@ -52,6 +53,7 @@ dependencies { provided library.java.hadoop_common testImplementation library.java.hadoop_client testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro") testImplementation library.java.junit testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") diff --git a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java index 433a53a20fe1..8a675e2c20d2 100644 --- a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java +++ b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java @@ -39,18 +39,18 @@ import org.apache.avro.specific.SpecificData; import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.CannotProvideCoderException; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderRegistry; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.FileIO.ReadableFile; import org.apache.beam.sdk.io.hadoop.SerializableConfiguration; import org.apache.beam.sdk.io.parquet.ParquetIO.ReadFiles.SplitReadFn; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.options.ValueProvider; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; diff --git a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java index 6dd67e3e511c..7ee3ec5050fd 100644 --- a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java +++ b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java @@ -40,12 +40,12 @@ import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; import org.apache.avro.reflect.ReflectData; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.parquet.ParquetIO.GenericRecordPassthroughFn; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.schemas.SchemaCoder; -import org.apache.beam.sdk.schemas.utils.AvroUtils; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; diff --git a/sdks/java/io/snowflake/build.gradle b/sdks/java/io/snowflake/build.gradle index 59115cef9695..ddb66118241f 100644 --- a/sdks/java/io/snowflake/build.gradle +++ b/sdks/java/io/snowflake/build.gradle @@ -36,6 +36,7 @@ dependencies { implementation "org.bouncycastle:bcprov-jdk15on:1.70" implementation library.java.joda_time testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") testImplementation 'com.google.cloud:google-cloud-storage:1.102.0' diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java index 10403fda2857..d41e2032e287 100644 --- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java +++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/read/SnowflakeIOReadTest.java @@ -23,7 +23,7 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.beam.sdk.Pipeline.PipelineExecutionException; -import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.AvroGeneratedUser; import org.apache.beam.sdk.io.snowflake.SnowflakeIO; import org.apache.beam.sdk.io.snowflake.services.SnowflakeServices; diff --git a/sdks/java/testing/expansion-service/build.gradle b/sdks/java/testing/expansion-service/build.gradle index 241c107462e9..47b9d6df926e 100644 --- a/sdks/java/testing/expansion-service/build.gradle +++ b/sdks/java/testing/expansion-service/build.gradle @@ -31,6 +31,7 @@ dependencies { testImplementation project(path: ":sdks:java:core", configuration: "shadow") testImplementation project(":sdks:java:io:parquet") testImplementation project(":sdks:java:expansion-service") + testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testRuntimeOnly project(":sdks:java:extensions:google-cloud-platform-core") testRuntimeOnly library.java.hadoop_client } diff --git a/sdks/java/testing/expansion-service/src/test/java/org/apache/beam/sdk/testing/expansion/TestExpansionService.java b/sdks/java/testing/expansion-service/src/test/java/org/apache/beam/sdk/testing/expansion/TestExpansionService.java index ebf9f5812b13..821d0e9db9ae 100644 --- a/sdks/java/testing/expansion-service/src/test/java/org/apache/beam/sdk/testing/expansion/TestExpansionService.java +++ b/sdks/java/testing/expansion-service/src/test/java/org/apache/beam/sdk/testing/expansion/TestExpansionService.java @@ -26,9 +26,9 @@ import org.apache.avro.generic.GenericRecord; import org.apache.beam.model.pipeline.v1.RunnerApi; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.expansion.ExternalTransformRegistrar; import org.apache.beam.sdk.expansion.service.ExpansionService; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.parquet.ParquetIO; diff --git a/sdks/java/testing/nexmark/build.gradle b/sdks/java/testing/nexmark/build.gradle index 86cf7a5ee3c7..8b989c258ed7 100644 --- a/sdks/java/testing/nexmark/build.gradle +++ b/sdks/java/testing/nexmark/build.gradle @@ -67,6 +67,7 @@ dependencies { implementation library.java.vendored_guava_26_0_jre implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(":sdks:java:io:google-cloud-platform") + implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:google-cloud-platform-core") implementation project(":sdks:java:extensions:sql") implementation project(":sdks:java:extensions:sql:zetasql") diff --git a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java index e9afc629315b..fd8563630832 100644 --- a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java +++ b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java @@ -36,7 +36,7 @@ import java.util.concurrent.ThreadLocalRandom; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.AvroIO; +import org.apache.beam.sdk.extensions.avro.io.AvroIO; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; diff --git a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkUtils.java b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkUtils.java index 896c3eab75e1..3d8985df3fab 100644 --- a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkUtils.java +++ b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkUtils.java @@ -31,13 +31,13 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.coders.CoderRegistry; import org.apache.beam.sdk.coders.CustomCoder; import org.apache.beam.sdk.coders.SerializableCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.Read; diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd b/sdks/python/apache_beam/coders/coder_impl.pxd index 5714f8beeeec..0e6e31d0fc82 100644 --- a/sdks/python/apache_beam/coders/coder_impl.pxd +++ b/sdks/python/apache_beam/coders/coder_impl.pxd @@ -109,6 +109,10 @@ cdef class BooleanCoderImpl(CoderImpl): pass +cdef class BigEndianShortCoderImpl(StreamCoderImpl): + pass + + cdef class SinglePrecisionFloatCoderImpl(StreamCoderImpl): pass diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index 094687ce68d8..cccc73662ce8 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -758,6 +758,22 @@ def estimate_size(self, unused_value, nested=False): if unused_value is not None else 0) +class BigEndianShortCoderImpl(StreamCoderImpl): + """For internal use only; no backwards-compatibility guarantees.""" + def encode_to_stream(self, value, out, nested): + # type: (int, create_OutputStream, bool) -> None + out.write_bigendian_int16(value) + + def decode_from_stream(self, in_stream, nested): + # type: (create_InputStream, bool) -> float + return in_stream.read_bigendian_int16() + + def estimate_size(self, unused_value, nested=False): + # type: (Any, bool) -> int + # A short is encoded as 2 bytes, regardless of nesting. + return 2 + + class SinglePrecisionFloatCoderImpl(StreamCoderImpl): """For internal use only; no backwards-compatibility guarantees.""" def encode_to_stream(self, value, out, nested): @@ -770,7 +786,7 @@ def decode_from_stream(self, in_stream, nested): def estimate_size(self, unused_value, nested=False): # type: (Any, bool) -> int - # A double is encoded as 8 bytes, regardless of nesting. + # A float is encoded as 4 bytes, regardless of nesting. return 4 diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index 25fabc951c55..d4ca99b80fb3 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -682,6 +682,25 @@ def __hash__(self): Coder.register_structured_urn(common_urns.coders.VARINT.urn, VarIntCoder) +class BigEndianShortCoder(FastCoder): + """A coder used for big-endian int16 values.""" + def _create_impl(self): + return coder_impl.BigEndianShortCoderImpl() + + def is_deterministic(self): + # type: () -> bool + return True + + def to_type_hint(self): + return int + + def __eq__(self, other): + return type(self) == type(other) + + def __hash__(self): + return hash(type(self)) + + class SinglePrecisionFloatCoder(FastCoder): """A coder used for single-precision floating-point values.""" def _create_impl(self): diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index a0bec891bdf1..7adb06cb2870 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -160,6 +160,7 @@ def tearDownClass(cls): coders.ListLikeCoder, coders.ProtoCoder, coders.ProtoPlusCoder, + coders.BigEndianShortCoder, coders.SinglePrecisionFloatCoder, coders.ToBytesCoder, coders.BigIntegerCoder, # tested in DecimalCoder diff --git a/sdks/python/apache_beam/coders/row_coder.py b/sdks/python/apache_beam/coders/row_coder.py index 9dd4dcd9f635..19424fa1f12b 100644 --- a/sdks/python/apache_beam/coders/row_coder.py +++ b/sdks/python/apache_beam/coders/row_coder.py @@ -22,6 +22,7 @@ from apache_beam.coders import typecoders from apache_beam.coders.coder_impl import LogicalTypeCoderImpl from apache_beam.coders.coder_impl import RowCoderImpl +from apache_beam.coders.coders import BigEndianShortCoder from apache_beam.coders.coders import BooleanCoder from apache_beam.coders.coders import BytesCoder from apache_beam.coders.coders import Coder @@ -153,6 +154,8 @@ def _nonnull_coder_from_type(field_type): if type_info == "atomic_type": if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64): return VarIntCoder() + if field_type.atomic_type == schema_pb2.INT16: + return BigEndianShortCoder() elif field_type.atomic_type == schema_pb2.FLOAT: return SinglePrecisionFloatCoder() elif field_type.atomic_type == schema_pb2.DOUBLE: diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index 11ccf7fd2e37..71a5b45d7691 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -69,6 +69,9 @@ def write_bigendian_uint64(self, v): def write_bigendian_int32(self, v): self.write(struct.pack('>i', v)) + def write_bigendian_int16(self, v): + self.write(struct.pack('>h', v)) + def write_bigendian_double(self, v): self.write(struct.pack('>d', v)) @@ -172,6 +175,9 @@ def read_bigendian_uint64(self): def read_bigendian_int32(self): return struct.unpack('>i', self.read(4))[0] + def read_bigendian_int16(self): + return struct.unpack('>h', self.read(2))[0] + def read_bigendian_double(self): return struct.unpack('>d', self.read(8))[0] diff --git a/sdks/python/apache_beam/coders/stream.pxd b/sdks/python/apache_beam/coders/stream.pxd index fc179bb8c1b6..97d66aa089a4 100644 --- a/sdks/python/apache_beam/coders/stream.pxd +++ b/sdks/python/apache_beam/coders/stream.pxd @@ -29,6 +29,7 @@ cdef class OutputStream(object): cpdef write_bigendian_int64(self, libc.stdint.int64_t signed_v) cpdef write_bigendian_uint64(self, libc.stdint.uint64_t signed_v) cpdef write_bigendian_int32(self, libc.stdint.int32_t signed_v) + cpdef write_bigendian_int16(self, libc.stdint.int16_t signed_v) cpdef write_bigendian_double(self, double d) cpdef write_bigendian_float(self, float d) @@ -46,6 +47,7 @@ cdef class ByteCountingOutputStream(OutputStream): cpdef write_bigendian_int64(self, libc.stdint.int64_t val) cpdef write_bigendian_uint64(self, libc.stdint.uint64_t val) cpdef write_bigendian_int32(self, libc.stdint.int32_t val) + cpdef write_bigendian_int16(self, libc.stdint.int16_t val) cpdef size_t get_count(self) cpdef bytes get(self) @@ -62,6 +64,7 @@ cdef class InputStream(object): cpdef libc.stdint.int64_t read_bigendian_int64(self) except? -1 cpdef libc.stdint.uint64_t read_bigendian_uint64(self) except? -1 cpdef libc.stdint.int32_t read_bigendian_int32(self) except? -1 + cpdef libc.stdint.int16_t read_bigendian_int16(self) except? -1 cpdef double read_bigendian_double(self) except? -1 cpdef float read_bigendian_float(self) except? -1 cpdef bytes read_all(self, bint nested=*) diff --git a/sdks/python/apache_beam/coders/stream.pyx b/sdks/python/apache_beam/coders/stream.pyx index 14536b007cc8..8f941c151bde 100644 --- a/sdks/python/apache_beam/coders/stream.pyx +++ b/sdks/python/apache_beam/coders/stream.pyx @@ -101,6 +101,14 @@ cdef class OutputStream(object): self.data[self.pos + 3] = (v ) self.pos += 4 + cpdef write_bigendian_int16(self, libc.stdint.int16_t signed_v): + cdef libc.stdint.uint16_t v = signed_v + if self.buffer_size < self.pos + 2: + self.extend(2) + self.data[self.pos ] = (v >> 8) + self.data[self.pos + 1] = (v ) + self.pos += 2 + cpdef write_bigendian_double(self, double d): self.write_bigendian_int64((&d)[0]) @@ -157,6 +165,9 @@ cdef class ByteCountingOutputStream(OutputStream): cpdef write_bigendian_int32(self, libc.stdint.int32_t _): self.count += 4 + cpdef write_bigendian_int16(self, libc.stdint.int16_t _): + self.count += 2 + cpdef size_t get_count(self): return self.count @@ -237,6 +248,11 @@ cdef class InputStream(object): | self.allc[self.pos - 3] << 16 | self.allc[self.pos - 4] << 24) + cpdef libc.stdint.int16_t read_bigendian_int16(self) except? -1: + self.pos += 2 + return (self.allc[self.pos - 1] + | self.allc[self.pos - 2] << 8) + cpdef double read_bigendian_double(self) except? -1: cdef libc.stdint.int64_t as_long = self.read_bigendian_int64() return (&as_long)[0] diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 35b64eb95813..57662056b2a0 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -139,6 +139,15 @@ def test_read_write_bigendian_int32(self): for v in values: self.assertEqual(v, in_s.read_bigendian_int32()) + def test_read_write_bigendian_int16(self): + values = 0, 1, -1, 2**15 - 1, -2**15, int(2**13 * math.pi) + out_s = self.OutputStream() + for v in values: + out_s.write_bigendian_int16(v) + in_s = self.InputStream(out_s.get()) + for v in values: + self.assertEqual(v, in_s.read_bigendian_int16()) + def test_byte_counting(self): bc_s = self.ByteCountingOutputStream() self.assertEqual(0, bc_s.get_count()) diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 69cd773593bd..c56f5eb8242b 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -523,3 +523,56 @@ background ... ``` Each line has a list of predicted label. + +--- +## MNIST digit classification with Tensorflow using Saved Model Weights +[`tensorflow_mnist_with_weights.py`](./tensorflow_mnist_with_weights.py) contains an implementation for a RunInference pipeline that performs image classification on handwritten digits from the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) database. + +The pipeline reads rows of pixels corresponding to a digit, performs basic preprocessing(converts the input shape to 28x28), passes the pixels to the trained Tensorflow model with RunInference, and then writes the predictions to a text file. + +The model is loaded from the saved model weights. This can be done by passing a function which creates the model and setting the model type as +`ModelType.SAVED_WEIGHTS` to the `TFModelHandler`. The path to saved weights saved using `model.save_weights(path)` should be passed to the `model_path` argument. + +### Dataset and model for language modeling + +To use this transform, you need a dataset and model for language modeling. + +1. Create a file named [`INPUT.csv`](gs://apache-beam-ml/testing/inputs/it_mnist_data.csv) that contains labels and pixels to feed into the model. Each row should have comma-separated elements. The first element is the label. All other elements are pixel values. The csv should not have column headers. The content of the file should be similar to the following example: +``` +1,0,0,0... +0,0,0,0... +1,0,0,0... +4,0,0,0... +... +``` +2. Save the weights of trained tensorflow model to a directory `SAVED_WEIGHTS_DIR` . + + +### Running `tensorflow_mnist_with_weights.py` + +To run the MNIST classification pipeline locally, use the following command: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_with_weights.py \ + --input INPUT \ + --output OUTPUT \ + --model_path SAVED_WEIGHTS_DIR +``` +For example: +```sh +python -m apache_beam.examples.inference.tensorflow_mnist_with_weights.py \ + --input INPUT.csv \ + --output predictions.txt \ + --model_path SAVED_WEIGHTS_DIR +``` + +This writes the output to the `predictions.txt` with contents like: +``` +1,1 +4,4 +0,0 +7,7 +3,3 +5,5 +... +``` +Each line has data separated by a comma ",". The first item is the actual label of the digit. The second item is the predicted label of the digit. diff --git a/sdks/python/apache_beam/examples/inference/tensorflow_mnist_with_weights.py b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_with_weights.py new file mode 100644 index 000000000000..ae51f8d9cdea --- /dev/null +++ b/sdks/python/apache_beam/examples/inference/tensorflow_mnist_with_weights.py @@ -0,0 +1,93 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging + +import apache_beam as beam +import tensorflow as tf +from apache_beam.examples.inference.tensorflow_mnist_classification import PostProcessor +from apache_beam.examples.inference.tensorflow_mnist_classification import parse_known_args +from apache_beam.examples.inference.tensorflow_mnist_classification import process_input +from apache_beam.ml.inference.base import KeyedModelHandler +from apache_beam.ml.inference.base import RunInference +from apache_beam.ml.inference.tensorflow_inference import ModelType +from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions +from apache_beam.runners.runner import PipelineResult + + +def get_model(): + inputs = tf.keras.layers.Input(shape=(28, 28, 1)) + x = tf.keras.layers.Conv2D(32, 3, activation="relu")(inputs) + x = tf.keras.layers.Conv2D(32, 3, activation="relu")(x) + x = tf.keras.layers.MaxPooling2D(2)(x) + x = tf.keras.layers.Conv2D(64, 3, activation="relu")(x) + x = tf.keras.layers.Conv2D(64, 3, activation="relu")(x) + x = tf.keras.layers.MaxPooling2D(2)(x) + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dropout(0.2)(x) + outputs = tf.keras.layers.Dense(10, activation='softmax')(x) + model = tf.keras.Model(inputs, outputs) + return model + + +def run( + argv=None, save_main_session=True, test_pipeline=None) -> PipelineResult: + """ + Args: + argv: Command line arguments defined for this example. + save_main_session: Used for internal testing. + test_pipeline: Used for internal testing. + """ + known_args, pipeline_args = parse_known_args(argv) + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = save_main_session + + # In this example we pass keyed inputs to RunInference transform. + # Therefore, we use KeyedModelHandler wrapper over TFModelHandlerNumpy. + model_loader = KeyedModelHandler( + TFModelHandlerNumpy( + model_uri=known_args.model_path, + model_type=ModelType.SAVED_WEIGHTS, + create_model_fn=get_model)) + + pipeline = test_pipeline + if not test_pipeline: + pipeline = beam.Pipeline(options=pipeline_options) + + label_pixel_tuple = ( + pipeline + | "ReadFromInput" >> beam.io.ReadFromText(known_args.input) + | "PreProcessInputs" >> beam.Map(process_input)) + + predictions = ( + label_pixel_tuple + | "RunInference" >> RunInference(model_loader) + | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) + + _ = predictions | "WriteOutput" >> beam.io.WriteToText( + known_args.output, shard_name_template='', append_trailing_newlines=True) + + result = pipeline.run() + result.wait_until_finish() + return result + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + run() diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index 1dcb56c51eca..ed8745ec2ac1 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -201,6 +201,24 @@ def test_xlang_jdbc_write_read(self, database): assert_that(result, equal_to(expected_row)) + # Try the same read using the partitioned reader code path. + # Outputs should be the same. + with TestPipeline() as p: + p.not_use_test_runner_api = True + result = ( + p + | 'Partitioned read from jdbc' >> ReadFromJdbc( + table_name=table_name, + partition_column='f_id', + partitions=3, + driver_class_name=self.driver_class_name, + jdbc_url=self.jdbc_url, + username=self.username, + password=self.password, + classpath=classpath)) + + assert_that(result, equal_to(expected_row)) + # Creating a container with testcontainers sometimes raises ReadTimeout # error. In java there are 2 retries set by default. def start_db_container(self, retries, container_init): diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py index 2be5d06a0264..08160ebd693c 100644 --- a/sdks/python/apache_beam/io/fileio.py +++ b/sdks/python/apache_beam/io/fileio.py @@ -456,7 +456,10 @@ def _format_shard( return format.format(**kwargs) -def destination_prefix_naming(suffix=None): +FileNaming = Callable[[Any, Any, int, int, Any, str, str], str] + + +def destination_prefix_naming(suffix=None) -> FileNaming: def _inner(window, pane, shard_index, total_shards, compression, destination): prefix = str(destination) return _format_shard( @@ -465,7 +468,7 @@ def _inner(window, pane, shard_index, total_shards, compression, destination): return _inner -def default_file_naming(prefix, suffix=None): +def default_file_naming(prefix, suffix=None) -> FileNaming: def _inner(window, pane, shard_index, total_shards, compression, destination): return _format_shard( window, pane, shard_index, total_shards, compression, prefix, suffix) @@ -473,7 +476,7 @@ def _inner(window, pane, shard_index, total_shards, compression, destination): return _inner -def single_file_naming(prefix, suffix=None): +def single_file_naming(prefix, suffix=None) -> FileNaming: def _inner(window, pane, shard_index, total_shards, compression, destination): assert shard_index in (0, None), shard_index assert total_shards in (1, None), total_shards diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 424f10ab5e14..05b023b73bc6 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -2403,13 +2403,14 @@ def expand(self, input): available_runners) external_storage_write = SchemaAwareExternalTransform( - self.schematransform_config.identifier, + identifier=self.schematransform_config.identifier, expansion_service=self._expansion_service, - table=self._table, createDisposition=self._create_disposition, writeDisposition=self._write_disposition, triggeringFrequencySeconds=self._triggering_frequency, - useAtLeastOnceSemantics=self._use_at_least_once) + useAtLeastOnceSemantics=self._use_at_least_once, + table=self._table, + ) input_tag = self.schematransform_config.inputs[0] diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py index d84c332faf06..f540568b0d08 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py @@ -583,6 +583,7 @@ class BigQueryXlangStorageWriteIT(unittest.TestCase): def setUp(self): self.test_pipeline = TestPipeline(is_integration_test=True) + self.args = self.test_pipeline.get_full_options_as_args() self.project = self.test_pipeline.get_option('project') self.bigquery_client = BigQueryWrapper() @@ -622,7 +623,7 @@ def tearDown(self): self.dataset_id, self.project) - @pytest.mark.uses_java_expansion_service + @pytest.mark.uses_gcp_java_expansion_service def test_xlang_storage_write(self): table_id = '{}:{}.python_xlang_storage_write'.format( self.project, self.dataset_id) @@ -633,11 +634,12 @@ def test_xlang_storage_write(self): '{}.python_xlang_storage_write'.format(self.dataset_id), data=self.expected_elements) - with beam.Pipeline() as p: + with beam.Pipeline(argv=self.args) as p: _ = ( p | beam.Create(self.row_elements) - | beam.io.StorageWriteToBigQuery(table=table_id)) + | beam.io.StorageWriteToBigQuery( + table=table_id, expansion_service=self.expansion_service)) hamcrest_assert(p, bq_matcher) diff --git a/sdks/python/apache_beam/io/jdbc.py b/sdks/python/apache_beam/io/jdbc.py index 85b80fdea0e4..aa539871601d 100644 --- a/sdks/python/apache_beam/io/jdbc.py +++ b/sdks/python/apache_beam/io/jdbc.py @@ -88,6 +88,8 @@ import typing +import numpy as np + from apache_beam.coders import RowCoder from apache_beam.transforms.external import BeamJarExpansionService from apache_beam.transforms.external import ExternalTransform @@ -113,19 +115,16 @@ def default_io_expansion_service(classpath=None): Config = typing.NamedTuple( 'Config', - [ - ('driver_class_name', str), - ('jdbc_url', str), - ('username', str), - ('password', str), - ('connection_properties', typing.Optional[str]), - ('connection_init_sqls', typing.Optional[typing.List[str]]), - ('read_query', typing.Optional[str]), - ('write_statement', typing.Optional[str]), - ('fetch_size', typing.Optional[int]), - ('output_parallelization', typing.Optional[bool]), - ('autosharding', typing.Optional[bool]), - ], + [('driver_class_name', str), ('jdbc_url', str), ('username', str), + ('password', str), ('connection_properties', typing.Optional[str]), + ('connection_init_sqls', typing.Optional[typing.List[str]]), + ('read_query', typing.Optional[str]), + ('write_statement', typing.Optional[str]), + ('fetch_size', typing.Optional[int]), + ('output_parallelization', typing.Optional[bool]), + ('autosharding', typing.Optional[bool]), + ('partition_column', typing.Optional[str]), + ('partitions', typing.Optional[np.int16])], ) DEFAULT_JDBC_CLASSPATH = ['org.postgresql:postgresql:42.2.16'] @@ -226,7 +225,8 @@ def __init__( fetch_size=None, output_parallelization=None, autosharding=autosharding, - ))), + partitions=None, + partition_column=None))), ), expansion_service or default_io_expansion_service(classpath), ) @@ -273,6 +273,8 @@ def __init__( query=None, output_parallelization=None, fetch_size=None, + partition_column=None, + partitions=None, connection_properties=None, connection_init_sqls=None, expansion_service=None, @@ -288,6 +290,10 @@ def __init__( :param query: sql query to be executed :param output_parallelization: is output parallelization on :param fetch_size: how many rows to fetch + :param partition_column: enable partitioned reads by splitting on this + column + :param partitions: override the default number of splits when using + partition_column :param connection_properties: properties of the jdbc connection passed as string with format [propertyName=property;]* @@ -324,7 +330,8 @@ def __init__( fetch_size=fetch_size, output_parallelization=output_parallelization, autosharding=None, - ))), + partition_column=partition_column, + partitions=partitions))), ), expansion_service or default_io_expansion_service(classpath), ) diff --git a/sdks/python/apache_beam/io/textio.py b/sdks/python/apache_beam/io/textio.py index 289c91e23b0a..abfeb9bc9799 100644 --- a/sdks/python/apache_beam/io/textio.py +++ b/sdks/python/apache_beam/io/textio.py @@ -21,6 +21,7 @@ import logging from functools import partial +from typing import TYPE_CHECKING from typing import Any from typing import Optional @@ -36,12 +37,19 @@ from apache_beam.transforms import PTransform from apache_beam.transforms.display import DisplayDataItem +if TYPE_CHECKING: + from apache_beam.io import fileio + __all__ = [ 'ReadFromText', 'ReadFromTextWithFilename', 'ReadAllFromText', 'ReadAllFromTextContinuously', - 'WriteToText' + 'WriteToText', + 'ReadFromCsv', + 'WriteToCsv', + 'ReadFromJson', + 'WriteToJson', ] _LOGGER = logging.getLogger(__name__) @@ -873,3 +881,159 @@ def __init__( def expand(self, pcoll): return pcoll | Write(self._sink) + + +try: + import pandas + + def append_pandas_args(src, exclude): + def append(dest): + state = None + skip = False + extra_lines = [] + for line in src.__doc__.split('\n'): + if line.strip() == 'Parameters': + indent = len(line) - len(line.lstrip()) + extra_lines = ['\n\nPandas Parameters'] + state = 'append' + continue + elif line.strip().startswith('Returns'): + break + + if state == 'append': + if skip: + if line and not line[indent:].startswith(' '): + skip = False + if any(line.strip().startswith(arg + ' : ') for arg in exclude): + skip = True + if not skip: + extra_lines.append(line[indent:]) + # Expand title underline due to Parameters -> Pandas Parameters. + extra_lines[1] += '-------' + dest.__doc__ += '\n'.join(extra_lines) + return dest + + return append + + @append_pandas_args( + pandas.read_csv, exclude=['filepath_or_buffer', 'iterator']) + def ReadFromCsv(path: str, *, splittable: bool = True, **kwargs): + """A PTransform for reading comma-separated values (csv) files into a + PCollection. + + Args: + path (str): The file path to read from. The path can contain glob + characters such as ``*`` and ``?``. + splittable (bool): Whether the csv files are splittable at line + boundaries, i.e. each line of this file represents a complete record. + This should be set to False if single records span multiple lines (e.g. + a quoted field has a newline inside of it). Setting this to false may + disable liquid sharding. + **kwargs: Extra arguments passed to `pandas.read_csv` (see below). + """ + from apache_beam.dataframe.io import ReadViaPandas + return ReadViaPandas('csv', path, splittable=splittable, **kwargs) + + @append_pandas_args( + pandas.DataFrame.to_csv, exclude=['path_or_buf', 'index', 'index_label']) + def WriteToCsv( + path: str, + num_shards: Optional[int] = None, + file_naming: Optional['fileio.FileNaming'] = None, + **kwargs): + # pylint: disable=line-too-long + + """A PTransform for writing a schema'd PCollection as a (set of) + comma-separated values (csv) files. + + Args: + path (str): The file path to write to. The files written will + begin with this prefix, followed by a shard identifier (see + `num_shards`) according to the `file_naming` parameter. + num_shards (optional int): The number of shards to use in the distributed + write. Defaults to None, letting the system choose an optimal value. + file_naming (optional callable): A file-naming strategy, determining the + actual shard names given their shard number, etc. + See the section on `file naming + `_ + Defaults to `fileio.default_file_naming`, which names files as + `path-XXXXX-of-NNNNN`. + **kwargs: Extra arguments passed to `pandas.Dataframe.to_csv` (see below). + """ + from apache_beam.dataframe.io import WriteViaPandas + if num_shards is not None: + kwargs['num_shards'] = num_shards + if file_naming is not None: + kwargs['file_naming'] = file_naming + return WriteViaPandas('csv', path, index=False, **kwargs) + + @append_pandas_args(pandas.read_json, exclude=['path_or_buf']) + def ReadFromJson( + path: str, *, orient: str = 'records', lines: bool = True, **kwargs): + """A PTransform for reading json values from files into a PCollection. + + Args: + path (str): The file path to read from. The path can contain glob + characters such as ``*`` and ``?``. + orient (str): Format of the json elements in the file. + Default to 'records', meaning the file is expected to contain a list + of json objects like `{field1: value1, field2: value2, ...}`. + lines (bool): Whether each line should be considered a separate record, + as opposed to the entire file being a valid JSON object or list. + Defaults to True (unlike Pandas). + **kwargs: Extra arguments passed to `pandas.read_json` (see below). + """ + from apache_beam.dataframe.io import ReadViaPandas + return ReadViaPandas('json', path, orient=orient, lines=lines, **kwargs) + + @append_pandas_args( + pandas.DataFrame.to_json, exclude=['path_or_buf', 'index']) + def WriteToJson( + path: str, + *, + num_shards: Optional[int] = None, + file_naming: Optional['fileio.FileNaming'] = None, + orient: str = 'records', + lines: Optional[bool] = None, + **kwargs): + # pylint: disable=line-too-long + + """A PTransform for writing a PCollection as json values to files. + + Args: + path (str): The file path to write to. The files written will + begin with this prefix, followed by a shard identifier (see + `num_shards`) according to the `file_naming` parameter. + num_shards (optional int): The number of shards to use in the distributed + write. Defaults to None, letting the system choose an optimal value. + file_naming (optional callable): A file-naming strategy, determining the + actual shard names given their shard number, etc. + See the section on `file naming + `_ + Defaults to `fileio.default_file_naming`, which names files as + `path-XXXXX-of-NNNNN`. + orient (str): Format of the json elements in the file. + Default to 'records', meaning the file will to contain a list + of json objects like `{field1: value1, field2: value2, ...}`. + lines (bool): Whether each line should be considered a separate record, + as opposed to the entire file being a valid JSON object or list. + Defaults to True if orient is 'records' (unlike Pandas). + **kwargs: Extra arguments passed to `pandas.Dataframe.to_json` + (see below). + """ + from apache_beam.dataframe.io import WriteViaPandas + if num_shards is not None: + kwargs['num_shards'] = num_shards + if file_naming is not None: + kwargs['file_naming'] = file_naming + if lines is None: + lines = orient == 'records' + return WriteViaPandas('json', path, orient=orient, lines=lines, **kwargs) + +except ImportError: + + def no_pandas(*args, **kwargs): + raise ImportError('Please install apache_beam[dataframe]') + + for transform in ('ReadFromCsv', 'WriteToCsv', 'ReadFromJson', 'WriteToJson'): + globals()[transform] = no_pandas diff --git a/sdks/python/apache_beam/io/textio_test.py b/sdks/python/apache_beam/io/textio_test.py index 6fb8d6ccb362..1d852d171324 100644 --- a/sdks/python/apache_beam/io/textio_test.py +++ b/sdks/python/apache_beam/io/textio_test.py @@ -1711,6 +1711,39 @@ def test_write_max_bytes_per_shard(self): self.assertEqual(sorted(read_result), sorted(lines)) +class CsvTest(unittest.TestCase): + def test_csv_read_write(self): + records = [beam.Row(a='str', b=ix) for ix in range(3)] + with tempfile.TemporaryDirectory() as dest: + with TestPipeline() as p: + # pylint: disable=expression-not-assigned + p | beam.Create(records) | beam.io.WriteToCsv(os.path.join(dest, 'out')) + with TestPipeline() as p: + pcoll = ( + p + | beam.io.ReadFromCsv(os.path.join(dest, 'out*')) + | beam.Map(lambda t: beam.Row(**dict(zip(type(t)._fields, t))))) + + assert_that(pcoll, equal_to(records)) + + +class JsonTest(unittest.TestCase): + def test_json_read_write(self): + records = [beam.Row(a='str', b=ix) for ix in range(3)] + with tempfile.TemporaryDirectory() as dest: + with TestPipeline() as p: + # pylint: disable=expression-not-assigned + p | beam.Create(records) | beam.io.WriteToJson( + os.path.join(dest, 'out')) + with TestPipeline() as p: + pcoll = ( + p + | beam.io.ReadFromJson(os.path.join(dest, 'out*')) + | beam.Map(lambda t: beam.Row(**dict(zip(type(t)._fields, t))))) + + assert_that(pcoll, equal_to(records)) + + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) unittest.main() diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py index ee33c53cadb0..dcebb9347ed9 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference.py @@ -52,6 +52,7 @@ class ModelType(enum.Enum): """Defines how a model file should be loaded.""" SAVED_MODEL = 1 + SAVED_WEIGHTS = 2 def _load_model(model_uri, model_type): @@ -61,6 +62,12 @@ def _load_model(model_uri, model_type): raise AssertionError('Unsupported model type for loading.') +def _load_model_from_weights(create_model_fn, weights_path): + model = create_model_fn() + model.load_weights(weights_path) + return model + + def default_numpy_inference_fn( model: tf.Module, batch: Sequence[numpy.ndarray], @@ -88,6 +95,7 @@ def __init__( self, model_uri: str, model_type: ModelType = ModelType.SAVED_MODEL, + create_model_fn: Optional[Callable] = None, *, inference_fn: TensorInferenceFn = default_numpy_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -101,6 +109,9 @@ def __init__( Args: model_uri (str): path to the trained model. model_type: type of model to be loaded. Defaults to SAVED_MODEL. + create_model_fn: a function that creates and returns a new + tensorflow model to load the saved weights. + It should be used with ModelType.SAVED_WEIGHTS. inference_fn: inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -110,9 +121,16 @@ def __init__( self._model_uri = model_uri self._model_type = model_type self._inference_fn = inference_fn + self._create_model_fn = create_model_fn def load_model(self) -> tf.Module: """Loads and initializes a Tensorflow model for processing.""" + if self._model_type == ModelType.SAVED_WEIGHTS: + if not self._create_model_fn: + raise ValueError( + "Callable create_model_fn must be passed" + "with ModelType.SAVED_WEIGHTS") + return _load_model_from_weights(self._create_model_fn, self._model_uri) return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): @@ -169,6 +187,7 @@ def __init__( self, model_uri: str, model_type: ModelType = ModelType.SAVED_MODEL, + create_model_fn: Optional[Callable] = None, *, inference_fn: TensorInferenceFn = default_tensor_inference_fn): """Implementation of the ModelHandler interface for Tensorflow. @@ -183,6 +202,9 @@ def __init__( model_uri (str): path to the trained model. model_type: type of model to be loaded. Defaults to SAVED_MODEL. + create_model_fn: a function that creates and returns a new + tensorflow model to load the saved weights. + It should be used with ModelType.SAVED_WEIGHTS. inference_fn: inference function to use during RunInference. Defaults to default_numpy_inference_fn. @@ -192,9 +214,16 @@ def __init__( self._model_uri = model_uri self._model_type = model_type self._inference_fn = inference_fn + self._create_model_fn = create_model_fn def load_model(self) -> tf.Module: """Loads and initializes a tensorflow model for processing.""" + if self._model_type == ModelType.SAVED_WEIGHTS: + if not self._create_model_fn: + raise ValueError( + "Callable create_model_fn must be passed" + "with ModelType.SAVED_WEIGHTS") + return _load_model_from_weights(self._create_model_fn, self._model_uri) return _load_model(self._model_uri, self._model_type) def update_model_path(self, model_path: Optional[str] = None): diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index 7b4b13ce2e1e..fb1a2964841b 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -31,6 +31,7 @@ import tensorflow as tf from apache_beam.examples.inference import tensorflow_imagenet_segmentation from apache_beam.examples.inference import tensorflow_mnist_classification + from apache_beam.examples.inference import tensorflow_mnist_with_weights except ImportError as e: tf = None @@ -51,9 +52,9 @@ class TensorflowInference(unittest.TestCase): def test_tf_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' - output_file_dir = 'apache-beam-ml/testing/outputs' + output_file_dir = 'gs://apache-beam-ml/testing/outputs' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) - model_path = 'apache-beam-ml/models/tensorflow/mnist/' + model_path = 'gs://apache-beam-ml/models/tensorflow/mnist/' extra_opts = { 'input': input_file, 'output': output_file, @@ -85,7 +86,7 @@ def test_tf_imagenet_image_segmentation(self): image_dir = ( 'https://storage.googleapis.com/download.tensorflow.org/example_images/' ) - output_file_dir = 'apache-beam-ml/testing/outputs' + output_file_dir = 'gs://apache-beam-ml/testing/outputs' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) model_path = ( 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4') @@ -108,6 +109,36 @@ def test_tf_imagenet_image_segmentation(self): for true_label, predicted_label in zip(expected_outputs, predicted_outputs): self.assertEqual(true_label, predicted_label) + def test_tf_mnist_with_weights_classification(self): + test_pipeline = TestPipeline(is_integration_test=True) + input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' + output_file_dir = 'gs://apache-beam-ml/testing/outputs' + output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) + model_path = 'gs://apache-beam-ml/models/tensorflow/mnist' + extra_opts = { + 'input': input_file, + 'output': output_file, + 'model_path': model_path, + } + tensorflow_mnist_with_weights.run( + test_pipeline.get_full_options_as_args(**extra_opts), + save_main_session=False) + self.assertEqual(FileSystems().exists(output_file), True) + + expected_output_filepath = 'gs://apache-beam-ml/testing/expected_outputs/test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + expected_outputs = process_outputs(expected_output_filepath) + predicted_outputs = process_outputs(output_file) + self.assertEqual(len(expected_outputs), len(predicted_outputs)) + + predictions_dict = {} + for i in range(len(predicted_outputs)): + true_label, prediction = predicted_outputs[i].split(',') + predictions_dict[true_label] = prediction + + for i in range(len(expected_outputs)): + true_label, expected_prediction = expected_outputs[i].split(',') + self.assertEqual(predictions_dict[true_label], expected_prediction) + if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 37bdb106038e..ee0b1095fa28 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1463,7 +1463,7 @@ def _add_argparse_args(cls, parser): class FlinkRunnerOptions(PipelineOptions): # These should stay in sync with gradle.properties. - PUBLISHED_FLINK_VERSIONS = ['1.12', '1.13', '1.14', '1.15'] + PUBLISHED_FLINK_VERSIONS = ['1.12', '1.13', '1.14', '1.15', '1.16'] @classmethod def _add_argparse_args(cls, parser): diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py index 0ba907308303..e0944d81d1b2 100644 --- a/sdks/python/apache_beam/pipeline.py +++ b/sdks/python/apache_beam/pipeline.py @@ -1013,7 +1013,7 @@ class PipelineVisitor(object): """For internal use only; no backwards-compatibility guarantees. Visitor pattern class used to traverse a DAG of transforms - (used internally by Pipeline for bookeeping purposes). + (used internally by Pipeline for bookkeeping purposes). """ def visit_value(self, value, producer_node): # type: (pvalue.PValue, AppliedPTransform) -> None diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py index a0600255028a..bfb27c4adc00 100644 --- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py +++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py @@ -678,7 +678,10 @@ def _get_element(message): else: if message.publish_time is None: raise ValueError('No publish time present in message: %s' % message) - timestamp = Timestamp.from_utc_datetime(message.publish_time) + try: + timestamp = Timestamp.from_utc_datetime(message.publish_time) + except ValueError as e: + raise ValueError('Bad timestamp value for message %s: %s', message, e) return timestamp, parsed_message diff --git a/sdks/python/apache_beam/utils/timestamp.py b/sdks/python/apache_beam/utils/timestamp.py index 502d1f78fa7a..68a40a69d0f4 100644 --- a/sdks/python/apache_beam/utils/timestamp.py +++ b/sdks/python/apache_beam/utils/timestamp.py @@ -103,6 +103,11 @@ def from_utc_datetime(cls, dt): Args: dt: A ``datetime.datetime`` object in UTC (offset-aware). """ + if dt.tzinfo is None: + raise ValueError( + "dt has no timezone info " + + "(https://docs.python.org/3/library/datetime.html" + + "#aware-and-naive-objects): %s" % dt) if dt.tzinfo != pytz.utc and dt.tzinfo != datetime.timezone.utc: raise ValueError('dt not in UTC: %s' % dt) duration = dt - cls._epoch_datetime_utc() diff --git a/sdks/python/apache_beam/utils/timestamp_test.py b/sdks/python/apache_beam/utils/timestamp_test.py index fd67b4fc082b..14a3c9651f89 100644 --- a/sdks/python/apache_beam/utils/timestamp_test.py +++ b/sdks/python/apache_beam/utils/timestamp_test.py @@ -87,7 +87,10 @@ def test_from_utc_datetime(self): datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)), Timestamp(0)) with self.assertRaisesRegex(ValueError, r'UTC'): - Timestamp.from_utc_datetime(datetime.datetime(1970, 1, 1)) + Timestamp.from_utc_datetime( + datetime.datetime(1970, 1, 1, tzinfo=pytz.timezone('US/Eastern'))) + with self.assertRaisesRegex(ValueError, r'dt has no timezone info'): + Timestamp.from_utc_datetime(datetime.datetime(1970, 1, 1, tzinfo=None)) def test_arithmetic(self): # Supported operations. diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py index 6c6eace1dcf1..5c6991789376 100644 --- a/sdks/python/apache_beam/version.py +++ b/sdks/python/apache_beam/version.py @@ -17,4 +17,4 @@ """Apache Beam SDK version information and utilities.""" -__version__ = '2.46.0.dev' +__version__ = '2.47.0.dev' diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index a9ae893ab0cf..bb9acbacf629 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -30,8 +30,8 @@ import ( "path/filepath" "regexp" "strings" - "syscall" "sync" + "syscall" "time" "github.com/apache/beam/sdks/v2/go/pkg/beam/artifact" @@ -216,10 +216,10 @@ func launchSDKProcess() error { // Keep track of child PIDs for clean shutdown without zombies childPids := struct { - v []int + v []int canceled bool - mu sync.Mutex - } {v: make([]int, 0, len(workerIds))} + mu sync.Mutex + }{v: make([]int, 0, len(workerIds))} // Forward trapped signals to child process groups in order to terminate them gracefully and avoid zombies go func() { @@ -251,20 +251,33 @@ func launchSDKProcess() error { go func(workerId string) { defer wg.Done() - childPids.mu.Lock() - if childPids.canceled { + errorCount := 0 + for { + childPids.mu.Lock() + if childPids.canceled { + childPids.mu.Unlock() + return + } + log.Printf("Executing Python (worker %v): python %v", workerId, strings.Join(args, " ")) + cmd := StartCommandEnv(map[string]string{"WORKER_ID": workerId}, "python", args...) + childPids.v = append(childPids.v, cmd.Process.Pid) childPids.mu.Unlock() - return - } - log.Printf("Executing Python (worker %v): python %v", workerId, strings.Join(args, " ")) - cmd := StartCommandEnv(map[string]string{"WORKER_ID": workerId}, "python", args...) - childPids.v = append(childPids.v, cmd.Process.Pid) - childPids.mu.Unlock() - - if err := cmd.Wait(); err != nil { - log.Printf("Python (worker %v) exited: %v", workerId, err) - } else { - log.Printf("Python (worker %v) exited.", workerId) + + if err := cmd.Wait(); err != nil { + // Retry on fatal errors, like OOMs and segfaults, not just + // DoFns throwing exceptions. + errorCount += 1 + if errorCount < 4 { + log.Printf("Python (worker %v) exited %v times: %v\nrestarting SDK process", + workerId, errorCount, err) + } else { + log.Fatalf("Python (worker %v) exited %v times: %v\nout of retries, failing container", + workerId, errorCount, err) + } + } else { + log.Printf("Python (worker %v) exited.", workerId) + break + } } }(workerId) } @@ -297,7 +310,7 @@ func StartCommandEnv(env map[string]string, prog string, args ...string) *exec.C func setupVenv(baseDir, workerId string) (string, error) { log.Printf("Initializing temporary Python venv ...") - dir := filepath.Join(baseDir, "beam-venv-worker-" + workerId) + dir := filepath.Join(baseDir, "beam-venv-worker-"+workerId) if _, err := os.Stat(dir); !os.IsNotExist(err) { // Probably leftovers from a previous run log.Printf("Cleaning up previous venv ...") diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 2733b2511d05..15e37386d4f3 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -27,6 +27,7 @@ python_functions = python_files = test_*.py *_test.py *_test_py3*.py *_test_it.py markers = + uses_gcp_java_expansion_service: collect Cross Language GCP Java transforms test runs uses_java_expansion_service: collect Cross Language Java transforms test runs uses_python_expansion_service: collect Cross Language Python transforms test runs xlang_sql_expansion_service: collect for Cross Language with SQL expansion service test runs diff --git a/sdks/python/scripts/generate_pydoc.sh b/sdks/python/scripts/generate_pydoc.sh index 51ef692ace55..79fdea2e5c5d 100755 --- a/sdks/python/scripts/generate_pydoc.sh +++ b/sdks/python/scripts/generate_pydoc.sh @@ -130,7 +130,7 @@ release = version autoclass_content = 'both' autodoc_inherit_docstrings = False autodoc_member_order = 'bysource' -autodoc_mock_imports = ["tensorrt", "cuda"] +autodoc_mock_imports = ["tensorrt", "cuda", "torch", "onnxruntime", "onnx", "tensorflow", "tensorflow_hub"] # Allow a special section for documenting DataFrame API napoleon_custom_sections = ['Differences from pandas'] @@ -253,10 +253,11 @@ EOF # Build the documentation using sphinx # Reference: http://www.sphinx-doc.org/en/stable/man/sphinx-build.html # Note we cut out warnings from apache_beam.dataframe, this package uses pandas -# documentation verbatim. +# documentation verbatim, as do some of the textio transforms. python $(type -p sphinx-build) -v -a -E -q target/docs/source \ target/docs/_build -c target/docs/source \ 2>&1 | grep -E -v 'apache_beam\.dataframe.*WARNING:' \ + 2>&1 | grep -E -v 'apache_beam\.io\.textio\.(ReadFrom|WriteTo)(Csv|Json).*WARNING:' \ 2>&1 | tee "target/docs/sphinx-build.log" # Fail if there are errors or warnings in docs @@ -269,6 +270,7 @@ python $(type -p sphinx-build) -v -a -E -q target/docs/source \ python -msphinx -M doctest target/docs/source \ target/docs/_build -c target/docs/source \ 2>&1 | grep -E -v 'apache_beam\.dataframe.*WARNING:' \ + 2>&1 | grep -E -v 'apache_beam\.io\.textio\.(ReadFrom|WriteTo)(Csv|Json).*WARNING:' \ 2>&1 | tee "target/docs/sphinx-doctest.log" # Fail if there are errors or warnings in docs diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 7525177a5b26..9a2628504cdf 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -270,12 +270,13 @@ def get_portability_package_data(): 'Sphinx>=1.5.2,<2.0', # Pinning docutils as a workaround for Sphinx issue: # https://github.com/sphinx-doc/sphinx/issues/9727 - 'docutils==0.17.1' + 'docutils==0.17.1', + 'pandas<2.0.0', ], 'test': [ 'freezegun>=0.3.12', 'joblib>=1.0.1', - 'mock>=1.0.1,<3.0.0', + 'mock>=1.0.1,<6.0.0', 'pandas<2.0.0', 'parameterized>=0.7.1,<0.9.0', 'pyhamcrest>=1.9,!=1.10.0,<2.0.0', diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 20e7e345c320..1b486053e3fd 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -17,6 +17,7 @@ */ evaluationDependsOn(':runners:google-cloud-dataflow-java:worker') +evaluationDependsOn(':sdks:python:test-suites:xlang') enablePythonPerformanceTest() String pythonVersionSuffix = project.ext.pythonVersion @@ -407,3 +408,25 @@ project.tasks.register("inferencePostCommitIT") { 'tensorRTtests', ] } + +// Create cross-language tasks for running tests against Java expansion service(s) +def dataflowProject = project.findProperty('dataflowProject') ?: 'apache-beam-testing' +def dataflowRegion = project.findProperty('dataflowRegion') ?: 'us-central1' + +project(":sdks:python:test-suites:xlang").ext.xlangTasks.each { taskMetadata -> + createCrossLanguageUsingJavaExpansionTask( + name: taskMetadata.name, + expansionProjectPath: taskMetadata.expansionProjectPath, + collectMarker: taskMetadata.collectMarker, + startJobServer: taskMetadata.startJobServer, + cleanupJobServer: taskMetadata.cleanupJobServer, + needsSdkLocation: true, + pythonPipelineOptions: [ + "--runner=TestDataflowRunner", + "--project=${dataflowProject}", + "--region=${dataflowRegion}", + "--sdk_harness_container_image_overrides=.*java.*,gcr.io/apache-beam-testing/beam-sdk/beam_java8_sdk:latest", + ], + pytestOptions: basicPytestOpts + ) +} diff --git a/sdks/python/test-suites/dataflow/py310/build.gradle b/sdks/python/test-suites/dataflow/py310/build.gradle index 680515bcef27..e3b96494dfcd 100644 --- a/sdks/python/test-suites/dataflow/py310/build.gradle +++ b/sdks/python/test-suites/dataflow/py310/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.10' apply from: "../common.gradle" -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/dataflow/py37/build.gradle b/sdks/python/test-suites/dataflow/py37/build.gradle index bdf3754a32c9..9f89c61e0a20 100644 --- a/sdks/python/test-suites/dataflow/py37/build.gradle +++ b/sdks/python/test-suites/dataflow/py37/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.7' apply from: "../common.gradle" -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/dataflow/py38/build.gradle b/sdks/python/test-suites/dataflow/py38/build.gradle index 62c7a0a0ae37..b3c3a5bfb8a6 100644 --- a/sdks/python/test-suites/dataflow/py38/build.gradle +++ b/sdks/python/test-suites/dataflow/py38/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.8' apply from: "../common.gradle" -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/dataflow/py39/build.gradle b/sdks/python/test-suites/dataflow/py39/build.gradle index 9338bff9ef66..e8e13eadaea8 100644 --- a/sdks/python/test-suites/dataflow/py39/build.gradle +++ b/sdks/python/test-suites/dataflow/py39/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.9' apply from: "../common.gradle" -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/dataflow/xlang/common.gradle b/sdks/python/test-suites/dataflow/xlang/common.gradle deleted file mode 100644 index 65f5d8e57c7d..000000000000 --- a/sdks/python/test-suites/dataflow/xlang/common.gradle +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.beam.gradle.BeamModulePlugin - -project.evaluationDependsOn(":sdks:python") -project.evaluationDependsOn(":runners:google-cloud-dataflow-java") -project.evaluationDependsOn(":sdks:java:io:google-cloud-platform:expansion-service") - -// Set up cross language tests -def envDir = project.project(":sdks:python").envdir -def crossLanguageTestClasspath = project.project(":runners:google-cloud-dataflow-java").sourceSets.test.runtimeClasspath -def jobPort = BeamModulePlugin.getRandomPort() -def tmpDir = System.getenv("TMPDIR") ?: System.getenv("WORKSPACE") ?: "/tmp" -def pidFile = "${tmpDir}/local_job_service_main-${jobPort}.pid" - -def setupTask = project.tasks.register("fnApiJobServerSetup", Exec) { - dependsOn ':sdks:python:installGcpTest' - - executable 'sh' - args '-c', ". ${envDir}/bin/activate && python -m apache_beam.runners.portability.local_job_service_main --job_port ${jobPort} --pid_file ${pidFile} --background --stdout_file ${tmpDir}/beam-fnapi-job-server.log" -} - -def cleanupTask = project.tasks.register("fnApiJobServerCleanup", Exec) { - executable 'sh' - args '-c', ". ${envDir}/bin/activate && python -m apache_beam.runners.portability.local_job_service_main --pid_file ${pidFile} --stop" -} - -// Create tasks for running tests against GCP expansion service -def gcpExpansionJar = project.project(':sdks:java:io:google-cloud-platform:expansion-service').buildTestExpansionServiceJar.archivePath - -createCrossLanguageValidatesRunnerTask( - name: "gcpCrossLanguage", - expansionJar: gcpExpansionJar, - collectTestsWithDecorator: "uses_gcp_java_expansion_service", - startJobServer: setupTask, - cleanupJobServer: cleanupTask, - numParallelTests: 1, - classpath: crossLanguageTestClasspath, - pythonPipelineOptions: [ - "--runner=TestDataflowRunner", - "--job_endpoint=localhost:${jobPort}", - "--environment_cache_millis=10000", - "--experiments=beam_fn_api", - ], - javaPipelineOptions: [ - "--runner=PortableRunner", - "--jobEndpoint=localhost:${jobPort}", - "--environmentCacheMillis=10000", - "--experiments=beam_fn_api", - ], -) \ No newline at end of file diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 80e0bf052e57..3d9bcdb55c20 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +evaluationDependsOn(':sdks:python:test-suites:xlang') def pythonVersionSuffix = project.ext.pythonVersion.replace('.', '') def pythonContainerVersion = project.ext.pythonVersion @@ -317,3 +318,27 @@ project.tasks.register("inferencePostCommitIT") { 'tensorflowInferenceTest' ] } + +// Create cross-language tasks for running tests against Java expansion service(s) +def gcpProject = project.findProperty('dataflowProject') ?: 'apache-beam-testing' + +project(":sdks:python:test-suites:xlang").ext.xlangTasks.each { taskMetadata -> + createCrossLanguageUsingJavaExpansionTask( + name: taskMetadata.name, + expansionProjectPath: taskMetadata.expansionProjectPath, + collectMarker: taskMetadata.collectMarker, + startJobServer: taskMetadata.startJobServer, + cleanupJobServer: taskMetadata.cleanupJobServer, + numParallelTests: 1, + pythonPipelineOptions: [ + "--runner=TestDirectRunner", + "--project=${gcpProject}", + ], + pytestOptions: [ + "--capture=no", // print stdout instantly + "--timeout=4500", // timeout of whole command execution + "--color=yes", // console color + "--log-cli-level=INFO" //log level info + ] + ) +} diff --git a/sdks/python/test-suites/direct/py310/build.gradle b/sdks/python/test-suites/direct/py310/build.gradle index e929ea033508..d1727740d060 100644 --- a/sdks/python/test-suites/direct/py310/build.gradle +++ b/sdks/python/test-suites/direct/py310/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.10' apply from: '../common.gradle' -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/direct/py37/build.gradle b/sdks/python/test-suites/direct/py37/build.gradle index 2efcf718cb88..bf99f72d429c 100644 --- a/sdks/python/test-suites/direct/py37/build.gradle +++ b/sdks/python/test-suites/direct/py37/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.7' apply from: '../common.gradle' -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/direct/py38/build.gradle b/sdks/python/test-suites/direct/py38/build.gradle index 0b7afa459763..edf86a7bf5a8 100644 --- a/sdks/python/test-suites/direct/py38/build.gradle +++ b/sdks/python/test-suites/direct/py38/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.8' apply from: '../common.gradle' -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/direct/py39/build.gradle b/sdks/python/test-suites/direct/py39/build.gradle index 4cbcb9bb8c61..ae3c61978f61 100644 --- a/sdks/python/test-suites/direct/py39/build.gradle +++ b/sdks/python/test-suites/direct/py39/build.gradle @@ -22,4 +22,3 @@ applyPythonNature() // Required to setup a Python 3 virtualenv and task names. pythonVersion = '3.9' apply from: '../common.gradle' -apply from: "../xlang/common.gradle" diff --git a/sdks/python/test-suites/direct/xlang/common.gradle b/sdks/python/test-suites/xlang/build.gradle similarity index 61% rename from sdks/python/test-suites/direct/xlang/common.gradle rename to sdks/python/test-suites/xlang/build.gradle index 6a09c4da0411..81f18798ec0c 100644 --- a/sdks/python/test-suites/direct/xlang/common.gradle +++ b/sdks/python/test-suites/xlang/build.gradle @@ -15,17 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - - +// This is a base file to set up cross language tests for different runners import org.apache.beam.gradle.BeamModulePlugin - +import static org.apache.beam.gradle.BeamModulePlugin.CrossLanguageTaskCommon project.evaluationDependsOn(":sdks:python") -project.evaluationDependsOn(":runners:direct-java") -project.evaluationDependsOn(":sdks:java:io:google-cloud-platform:expansion-service") // Set up cross language tests def envDir = project.project(":sdks:python").envdir -def crossLanguageTestClasspath = project.project(":runners:direct-java").sourceSets.test.runtimeClasspath def jobPort = BeamModulePlugin.getRandomPort() def tmpDir = System.getenv("TMPDIR") ?: System.getenv("WORKSPACE") ?: "/tmp" def pidFile = "${tmpDir}/local_job_service_main-${jobPort}.pid" @@ -42,27 +38,22 @@ def cleanupTask = project.tasks.register("fnApiJobServerCleanup", Exec) { args '-c', ". ${envDir}/bin/activate && python -m apache_beam.runners.portability.local_job_service_main --pid_file ${pidFile} --stop" } -// Create tasks for running tests against GCP expansion service -def gcpExpansionJar = project.project(':sdks:java:io:google-cloud-platform:expansion-service').buildTestExpansionServiceJar.archivePath +// List of objects representing task metadata to create cross-language tasks from. +// Each object contains the minimum relevant metadata. +def xlangTasks = [] + +// ******** Java GCP expansion service ******** +def gcpExpansionProject = project.project(':sdks:java:io:google-cloud-platform:expansion-service') +// Properties that are common across runners. +// Used to launch the expansion service, collect the right tests, and cleanup afterwards +def gcpXlangCommon = new CrossLanguageTaskCommon().tap { + name = "gcpCrossLanguage" + expansionProjectPath = gcpExpansionProject.getPath() + collectMarker = "uses_gcp_java_expansion_service" + startJobServer = setupTask + cleanupJobServer = cleanupTask +} +xlangTasks.add(gcpXlangCommon) + -createCrossLanguageValidatesRunnerTask( - name: "gcpCrossLanguage", - expansionJar: gcpExpansionJar, - collectTestsWithDecorator: "uses_gcp_java_expansion_service", - startJobServer: setupTask, - cleanupJobServer: cleanupTask, - numParallelTests: 1, - classpath: crossLanguageTestClasspath, - pythonPipelineOptions: [ - "--runner=TestDirectRunner", - "--job_endpoint=localhost:${jobPort}", - "--environment_cache_millis=10000", - "--experiments=beam_fn_api", - ], - javaPipelineOptions: [ - "--runner=PortableRunner", - "--jobEndpoint=localhost:${jobPort}", - "--environmentCacheMillis=10000", - "--experiments=beam_fn_api", - ], -) \ No newline at end of file +ext.xlangTasks = xlangTasks diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 5b7e10bf12ab..ca0db7ed2d7d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -32,7 +32,7 @@ passenv=TERM # Set [] options for pip installation of apache-beam tarball. extras = test,dataframe # Don't warn that these commands aren't installed. -whitelist_externals = +allowlist_externals = false time bash @@ -150,11 +150,6 @@ deps = sphinx_rtd_theme==0.4.3 docutils<0.18 Jinja2==3.0.3 # TODO(https://github.com/apache/beam/issues/21587): Sphinx version is too old. - torch - onnxruntime - onnx - tensorflow - tensorflow_hub commands = time {toxinidir}/scripts/generate_pydoc.sh @@ -166,7 +161,7 @@ deps = holdup==1.8.0 extras = gcp -whitelist_externals = +allowlist_externals = echo sleep passenv = HDFSCLI_CONFIG @@ -195,7 +190,7 @@ deps = -r build-requirements.txt extras = azure -whitelist_externals = +allowlist_externals = echo sleep passenv = REQUESTS_CA_BUNDLE @@ -343,8 +338,7 @@ commands = /bin/sh -c "pip freeze | grep -E onnx" # Run all ONNX unit tests pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} - -[testenv:py{37,38,39,310}-tf-{211}] + [testenv:py{37,38,39,310}-tensorflow-{29,210,211}] deps = -r build-requirements.txt diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index 1cdc3a1acea1..738f6bc3d434 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,6 +1,6 @@ { "name": "apache-beam", - "version": "2.45.0-SNAPSHOT", + "version": "2.47.0-SNAPSHOT", "devDependencies": { "@google-cloud/bigquery": "^5.12.0", "@types/mocha": "^9.0.0", diff --git a/settings.gradle.kts b/settings.gradle.kts index 3c19893774ec..4ae5fb308684 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -98,6 +98,10 @@ include(":runners:flink:1.14:job-server-container") include(":runners:flink:1.15") include(":runners:flink:1.15:job-server") include(":runners:flink:1.15:job-server-container") +// Flink 1.16 +include(":runners:flink:1.16") +include(":runners:flink:1.16:job-server") +include(":runners:flink:1.16:job-server-container") /* End Flink Runner related settings */ include(":runners:twister2") include(":runners:google-cloud-dataflow-java") @@ -254,6 +258,7 @@ include(":sdks:python:test-suites:tox:py37") include(":sdks:python:test-suites:tox:py38") include(":sdks:python:test-suites:tox:py39") include(":sdks:python:test-suites:tox:py310") +include(":sdks:python:test-suites:xlang") include(":sdks:typescript") include(":sdks:typescript:container") include(":vendor:bytebuddy-1_12_8") diff --git a/website/www/site/content/en/documentation/ml/model-evaluation.md b/website/www/site/content/en/documentation/ml/model-evaluation.md new file mode 100755 index 000000000000..a0f05a75f282 --- /dev/null +++ b/website/www/site/content/en/documentation/ml/model-evaluation.md @@ -0,0 +1,85 @@ +--- +title: "ML Model Evaluation" +--- + + +# ML Model Evaluation + +Model evaluation is an essential part of your ML journey. It allows you to benchmark your model’s performance against an unseen dataset. You can extract chosen metrics, create visualizations, log metadata, and compare the performance of different models. In your MLOps ecosystem, a model evaluation step is crucial for monitoring the evolution of your model or multiple models when your dataset grows or changes over time and when you retrain your model. + +Beam provides support for running model evaluation on a TensorFlow model directly inside your pipeline by using a PTransform called [ExtractEvaluateAndWriteResults](https://www.tensorflow.org/tfx/model_analysis/api_docs/python/tfma/ExtractEvaluateAndWriteResults). This PTransform is part of [TensorFlow Model Analysis (TFMA)](https://www.tensorflow.org/tfx/guide/tfma), a library for performing model evaluation across different slices of data. TFMA performs its computations in a distributed manner over large amounts of data using Beam, allowing you to evaluate models on large amounts of data in a distributed manner. These metrics are compared over slices of data and visualized in Jupyter or Colab notebooks. + +## TFMA Example + +Here is an example of how you can use ExtractEvaluateAndWriteResults to evaluate a linear regression model. + +First, define the configuration to specify the model information, the chosen metrics, and optionally the data slices. + +```python +from google.protobuf import text_format + +# Define the TFMA evaluation configuration +eval_config = text_format.Parse(""" + +## Model information + + model_specs { + # For keras and serving models, you need to add a `label_key`. + label_key: "output" + } + +## This post-training metric information is merged with any built-in + +## metrics from training + + metrics_specs { + metrics { class_name: "ExampleCount" } + metrics { class_name: "MeanAbsoluteError" } + metrics { class_name: "MeanSquaredError" } + metrics { class_name: "MeanPrediction" } + } + + slicing_specs {} +""", tfma.EvalConfig()) +``` + +Then, create a pipeline to run the evaluation: + +```python +from tfx_bsl.public import tfxio + +eval_shared_model = tfma.default_eval_shared_model( + eval_saved_model_path='model_path', eval_config=eval_config) + +tfx_io = tfxio.TFExampleRecord( + file_pattern='tfrecords_path', + raw_record_column_name=tfma.ARROW_INPUT_COLUMN) + +# Run evaluation +with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadData' >> tfx_io.BeamSource() + | 'EvalModel' >> tfma.ExtractEvaluateAndWriteResults( + eval_shared_model=eval_shared_model, + eval_config=eval_config, + output_path='output_path')) +``` + +This pipeline saves the results, including the config file, metrics, plots, and so on, to a chosen output_path. + +## TFMA End-to-end Example + +For a full end-to-end example of model evaluation in TFMA on Beam, see the [tfma_beam notebook](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/tfma_beam.ipynb). + +This example shows the creation of tfrecords from an open source dataset, the training of a model, and the evaluation in Beam. diff --git a/website/www/site/content/en/documentation/ml/overview.md b/website/www/site/content/en/documentation/ml/overview.md index dabe7e9629fe..feec2c4e807c 100644 --- a/website/www/site/content/en/documentation/ml/overview.md +++ b/website/www/site/content/en/documentation/ml/overview.md @@ -36,12 +36,12 @@ Let’s take a look at the different building blocks that we need to create an e 2. **Data validation**: After you receieve your data, check the quality of your data. For example, you might want to detect outliers and calculate standard deviations and class distributions. 3. **Data preprocessing**: After you validate your data, transform the data so that it is ready to use to train your model. 4. Model training: When your data is ready, you can start training your AI/ML model. This step is typically repeated multiple times, depending on the quality of your trained model. -5. Model validation: Before you deploy your new model, validate its performance and accuracy. +5. **Model validation**: Before you deploy your new model, validate its performance and accuracy. 6. **Model deployment**: Deploy your model, using it to run inference on new or existing data. To keep your model up to date and performing well as your data grows and evolves, run these steps multiple times. In addition, you can apply MLOps to your project to automate the AI/ML workflows throughout the model and data lifecycle. Use orchestrators to automate this flow and to handle the transition between the different building blocks in your project. -You can use Apache Beam for data validation, data preprocessing, and model deployment/inference. The next section examines these building blocks in more detail and explores how they can be orchestrated. +You can use Apache Beam for data validation, data preprocessing, model validation, and model deployment/inference. The next section examines these building blocks in more detail and explores how they can be orchestrated. ## Data processing @@ -62,10 +62,12 @@ Beam provides different ways to implement inference as part of your pipeline. Yo The recommended way to implement inference is by using the [RunInference API](/documentation/sdks/python-machine-learning/). RunInference takes advantage of existing Apache Beam concepts, such as the `BatchElements` transform and the `Shared` class, to enable you to use models in your pipelines to create transforms optimized for machine learning inferences. The ability to create arbitrarily complex workflow graphs also allows you to build multi-model pipelines. -You can integrate your model in your pipeline by using the corresponding model handlers. A `ModelHandler` is an object that wraps the underlying model and allows you to configure its parameters. Model handlers are available for PyTorch, scikit-learn, and TensorFlow. Examples of how to use RunInference for PyTorch, scikit-learn, and TensorFlow are shown in this [notebook](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb). +You can integrate your model in your pipeline by using the corresponding model handlers. A `ModelHandler` is an object that wraps the underlying model and allows you to configure its parameters. Model handlers are available for PyTorch, scikit-learn, and TensorFlow. Examples of how to use RunInference for PyTorch, scikit-learn, and TensorFlow are shown in the [RunInference notebook](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb). Because they can process multiple computations simultaneously, GPUs are optimized for training artificial intelligence and deep learning models. RunInference also allows you to use GPUs for significant inference speedup. An example of how to use RunInference with GPUs is demonstrated on the [RunInference metrics](/documentation/ml/runinference-metrics) page. +Another usecase of running machine learning models is to run them on hardware devices. [Nvidia TensorRT](https://developer.nvidia.com/tensorrt) is a machine learning framework used to run inference on Nvidia hardware. See [TensorRT Inference](/documentation/ml/tensorrt-runinference) for an example of a pipeline that uses TensorRT and Beam with the RunInference transform and a BERT-based text classification model. + ### Custom Inference The RunInference API doesn't currently support making remote inference calls using, for example, the Natural Language API or the Cloud Vision API. Therefore, in order to use these remote APIs with Apache Beam, you need to write custom inference calls. The [Remote inference in Apache Beam notebook](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/custom_remote_inference.ipynb) shows how to implement a custom remote inference call using `beam.DoFn`. When you implement a remote inference for real life projects, consider the following factors: @@ -78,6 +80,12 @@ The RunInference API doesn't currently support making remote inference calls usi * Consider monitoring and measuring the performance of a pipeline when deploying, because monitoring can provide insight into the status and health of the application. +## Model validation + +Model validation allows you to benchmark your model’s performance against a previously unseen dataset. You can extract chosen metrics, create visualizations, log metadata, and compare the performance of different models with the end goal of validating whether your model is ready to deploy. Beam provides support for running model evaluation on a TensorFlow model directly inside your pipeline. + +Further reading: +* [ML model evaluation](/documentation/ml/model-evaluation): Illustrates how to integrate model evaluation as part of your pipeline by using [TensorFlow Model Analysis (TFMA)](https://www.tensorflow.org/tfx/guide/tfma). ## Orchestrators @@ -85,13 +93,15 @@ In order to automate and track the AI/ML workflows throughout your project, you When you use Apache Beam as one of the building blocks in your project, these orchestrators are able to launch your Apache Beam job and to keep track of the input and output of your pipeline. These tasks are essential when moving your AI/ML solution into production, because they allow you to handle your model and data over time and improve the quality and reproducibility of results. +Further reading: +* [ML Workflow Orchestration](/documentation/ml/orchestration): Illustrates how to orchestrate ML workflows consisting of multiple steps by using Kubeflow Pipelines and Tensorflow Extended. + ## Examples You can find examples of end-to-end AI/ML pipelines for several use cases: -* [ML Workflow Orchestration](/documentation/ml/orchestration): Illustrates how to orchestrate ML workflows consisting of multiple steps by using Kubeflow Pipelines and Tensorflow Extended. + * [Multi model pipelines in Beam](/documentation/ml/multi-model-pipelines): Explains how multi-model pipelines work and gives an overview of what you need to know to build one using the RunInference API. * [Online Clustering in Beam](/documentation/ml/online-clustering): Demonstrates how to set up a real-time clustering pipeline that can read text from Pub/Sub, convert the text into an embedding using a transformer-based language model with the RunInference API, and cluster the text using BIRCH with stateful processing. * [Anomaly Detection in Beam](/documentation/ml/anomaly-detection): Demonstrates how to set up an anomaly detection pipeline that reads text from Pub/Sub in real time and then detects anomalies using a trained HDBSCAN clustering model with the RunInference API. * [Large Language Model Inference in Beam](/documentation/ml/large-language-modeling): Demonstrates a pipeline that uses RunInference to perform translation with the T5 language model which contains 11 billion parameters. * [Per Entity Training in Beam](/documentation/ml/per-entity-training): Demonstrates a pipeline that trains a Decision Tree Classifier per education level for predicting if the salary of a person is >= 50k. -* [TensorRT Inference](/documentation/ml/tensorrt-runinference): Demonstrates a pipeline that uses TensorRT with the RunInference transform and a BERT-based text classification model. diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index 08e47637634c..eeda63211b6e 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -39,7 +39,7 @@ The Python SDK supports Python 3.7, 3.8, 3.9, and 3.10. {{< /paragraph >}} {{< paragraph class="language-go">}} -The Go SDK supports Go v1.18+. SDK release 2.32.0 is the last experimental version. +The Go SDK supports Go v1.19+. SDK release 2.32.0 is the last experimental version. {{< /paragraph >}} {{< paragraph class="language-typescript">}} diff --git a/website/www/site/content/en/get-started/from-spark.md b/website/www/site/content/en/get-started/from-spark.md index 26a615304b3c..36d1fb2a0450 100644 --- a/website/www/site/content/en/get-started/from-spark.md +++ b/website/www/site/content/en/get-started/from-spark.md @@ -312,11 +312,12 @@ with beam.Pipeline() as pipeline: min_value = values | beam.CombineGlobally(min) max_value = values | beam.CombineGlobally(max) - # To access `total`, we need to pass it as a side input. + # To access `min_value` and `max_value`, we need to pass them as a side input. scaled_values = values | beam.Map( - lambda x, min_value, max_value: x / lambda x: (x - min_value) / (max_value - min_value), - min_value =beam.pvalue.AsSingleton(min_value), - max_value =beam.pvalue.AsSingleton(max_value)) + lambda x, minimum, maximum: (x - minimum) / (maximum - minimum), + minimum=beam.pvalue.AsSingleton(min_value), + maximum=beam.pvalue.AsSingleton(max_value), + ) scaled_values | beam.Map(print) {{< /highlight >}} diff --git a/website/www/site/content/en/get-started/quickstart-go.md b/website/www/site/content/en/get-started/quickstart-go.md index 6c82f10e30fa..c2504205aa6c 100644 --- a/website/www/site/content/en/get-started/quickstart-go.md +++ b/website/www/site/content/en/get-started/quickstart-go.md @@ -25,7 +25,7 @@ If you're interested in contributing to the Apache Beam Go codebase, see the [Co ## Set up your environment -The Beam SDK for Go requires `go` version 1.18 or newer. It can be downloaded [here](https://golang.org/). Check that you have version 1.18 by running: +The Beam SDK for Go requires `go` version 1.19 or newer. It can be downloaded [here](https://golang.org/). Check that you have version 1.19 by running: {{< highlight >}} $ go version diff --git a/website/www/site/layouts/case-studies/list.html b/website/www/site/layouts/case-studies/list.html index c609d56f468f..1021cf13912f 100644 --- a/website/www/site/layouts/case-studies/list.html +++ b/website/www/site/layouts/case-studies/list.html @@ -70,7 +70,7 @@

{{ .Params.cardTitle }}

Share your story -

Also used by

+

Also used by

{{ range where $pages "Params.category" "ne" "study" }} {{ if .Params.hasLink }} diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/www/site/layouts/partials/section-menu/en/documentation.html old mode 100644 new mode 100755 index f0046693cfc0..61d7aa9fe355 --- a/website/www/site/layouts/partials/section-menu/en/documentation.html +++ b/website/www/site/layouts/partials/section-menu/en/documentation.html @@ -217,15 +217,16 @@
  • diff --git a/website/www/site/static/images/ml-workflows.svg b/website/www/site/static/images/ml-workflows.svg index 2a9cb3c1f27a..90130a40672b 100755 --- a/website/www/site/static/images/ml-workflows.svg +++ b/website/www/site/static/images/ml-workflows.svg @@ -14,4 +14,4 @@ limitations under the License. --> -
    Data Ingestion
    Data Ingestion
    Data Validation
    Data Validation
    Data Preprocessing
    Data Preprocessing
    Model Training
    Model Training
    Model Validaton
    Model Validaton
    Model Deployment
    Model Deployment
    New
    data
    New...
    Model iterations
    Model iterations
    Workflows in Beam
    Workflows in Beam
    Workflows outside of Beam
    Workflows outside of Beam
    Text is not SVG - cannot display
    \ No newline at end of file +
    Data Ingestion
    Data Ingestion
    Data Validation
    Data Validation
    Data Preprocessing
    Data Preprocessing
    Model Training
    Model Training
    Model Validaton
    Model Validaton
    Model Deployment
    Model Deployment
    New
    data
    New...
    Model iterations
    Model iterations
    Workflows in Beam
    Workflows in Beam
    Workflows outside of Beam
    Workflows outside of Beam
    Text is not SVG - cannot display
    \ No newline at end of file