From 2edfb703de840e8e23a309d41d9d29bbc933170d Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 28 Oct 2022 13:41:17 -0700 Subject: [PATCH 01/21] `AirbyteEstimateTraceMessage` --- .../airbyte_cdk/models/airbyte_protocol.py | 26 ++++++++++++++++ .../internal/AirbyteMessageTracker.java | 5 +++ .../airbyte_protocol/airbyte_protocol.yaml | 31 ++++++++++++++++++- .../understanding-airbyte/airbyte-protocol.md | 1 + 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index a4b654310d00..9ab3ba3016cf 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -98,6 +98,28 @@ class Config: failure_type: Optional[FailureType] = Field(None, description="The type of error") +class Type1(Enum): + stream = "stream" + sync = "sync" + + +class AirbyteEstimateTraceMessage(BaseModel): + class Config: + extra = Extra.allow + + name: str = Field(..., description="The name of the stream") + type: Type1 = Field(..., description="The type of estimate") + namespace: Optional[str] = Field(None, description="The namespace of the stream") + row_estimate: Optional[float] = Field( + None, + description="The estimated number of rows to be emitted by this sync for this stream", + ) + byte_estimate: Optional[float] = Field( + None, + description="The estimated number of bytes to be emitted by this sync for this stream", + ) + + class OrchestratorType(Enum): CONNECTOR_CONFIG = "CONNECTOR_CONFIG" @@ -213,6 +235,10 @@ class Config: type: TraceType = Field(..., description="the type of trace message", title="trace type") emitted_at: float = Field(..., description="the time in ms that the message was emitted") error: Optional[AirbyteErrorTraceMessage] = Field(None, description="error trace message: the error object") + estimate: Optional[AirbyteEstimateTraceMessage] = Field( + None, + description="Estimate trace message: a guess at how much data will be produced in this sync", + ) class AirbyteControlMessage(BaseModel): diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java index aa4b348887ae..3c6341501740 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java @@ -251,6 +251,7 @@ private void handleEmittedOrchestratorConnectorConfig(final AirbyteControlConnec private void handleEmittedTrace(final AirbyteTraceMessage traceMessage, final ConnectorType connectorType) { switch (traceMessage.getType()) { case ERROR -> handleEmittedErrorTrace(traceMessage, connectorType); + case ESTIMATE -> handleEmittedEstimateTrace(traceMessage, connectorType); default -> log.warn("Invalid message type for trace message: {}", traceMessage); } } @@ -263,6 +264,10 @@ private void handleEmittedErrorTrace(final AirbyteTraceMessage errorTraceMessage } } + private void handleEmittedEstimateTrace(final AirbyteTraceMessage estimateTraceMessage, final ConnectorType connectorType) { + // TODO! + } + private short getStreamIndex(final String streamName) { if (!streamNameToIndex.containsKey(streamName)) { streamNameToIndex.put(streamName, nextStreamIndex); diff --git a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml index 9965bde95825..1e60d1058cd3 100644 --- a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml +++ b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml @@ -4,7 +4,7 @@ title: AirbyteProtocol type: object description: AirbyteProtocol structs -version: 0.3.1 +version: 0.3.2 properties: airbyte_message: "$ref": "#/definitions/AirbyteMessage" @@ -174,12 +174,16 @@ definitions: type: string enum: - ERROR + - ESTIMATE emitted_at: description: "the time in ms that the message was emitted" type: number error: description: "error trace message: the error object" "$ref": "#/definitions/AirbyteErrorTraceMessage" + estimate: + description: "Estimate trace message: a guess at how much data will be produced in this sync" + "$ref": "#/definitions/AirbyteEstimateTraceMessage" AirbyteErrorTraceMessage: type: object additionalProperties: true @@ -201,6 +205,31 @@ definitions: enum: - system_error - config_error + AirbyteEstimateTraceMessage: + type: object + additionalProperties: true + required: + - name + - type + properties: + name: + description: The name of the stream + type: string + type: + description: The type of estimate + type: string + enum: + - stream + - sync + namespace: + description: The namespace of the stream + type: string + row_estimate: + description: The estimated number of rows to be emitted by this sync for this stream + type: number + byte_estimate: + description: The estimated number of bytes to be emitted by this sync for this stream + type: number AirbyteControlMessage: type: object additionalProperties: true diff --git a/docs/understanding-airbyte/airbyte-protocol.md b/docs/understanding-airbyte/airbyte-protocol.md index 381e03f05aa1..c34dbf726442 100644 --- a/docs/understanding-airbyte/airbyte-protocol.md +++ b/docs/understanding-airbyte/airbyte-protocol.md @@ -28,6 +28,7 @@ The Airbyte Protocol is versioned independently of the Airbyte Platform, and the | Version | Date of Change | Pull Request(s) | Subject | | :------- | :------------- | :------------------------------------------------------------------------------------------------------------------ | :------------------------------------------------------------------------------- | +| `v0.3.2` | 2022-10-128 | [xxx](https://github.com/airbytehq/airbyte/pull/xxx) | `AirbyteEstimateTraceMessage` added | | `v0.3.1` | 2022-10-12 | [17907](https://github.com/airbytehq/airbyte/pull/17907) | `AirbyteControlMessage.ConnectorConfig` added | | `v0.3.0` | 2022-09-09 | [16479](https://github.com/airbytehq/airbyte/pull/16479) | `AirbyteLogMessage.stack_trace` added | | `v0.2.0` | 2022-06-10 | [13573](https://github.com/airbytehq/airbyte/pull/13573) & [12586](https://github.com/airbytehq/airbyte/pull/12586) | `STREAM` and `GLOBAL` STATE messages | From 970c7995191581a1599ae5779a5bd1efdab86f3a Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 28 Oct 2022 13:44:29 -0700 Subject: [PATCH 02/21] upper-case enum options --- airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py | 5 +++-- .../io/airbyte/workers/internal/AirbyteMessageTracker.java | 1 + .../main/resources/airbyte_protocol/airbyte_protocol.yaml | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index 9ab3ba3016cf..7560d968d073 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -81,6 +81,7 @@ class Config: class TraceType(Enum): ERROR = "ERROR" + ESTIMATE = "ESTIMATE" class FailureType(Enum): @@ -99,8 +100,8 @@ class Config: class Type1(Enum): - stream = "stream" - sync = "sync" + STREAM = "STREAM" + SYNC = "SYNC" class AirbyteEstimateTraceMessage(BaseModel): diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java index 3c6341501740..96582131459c 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java @@ -264,6 +264,7 @@ private void handleEmittedErrorTrace(final AirbyteTraceMessage errorTraceMessage } } + @SuppressWarnings("PMD") // until method is implemented private void handleEmittedEstimateTrace(final AirbyteTraceMessage estimateTraceMessage, final ConnectorType connectorType) { // TODO! } diff --git a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml index 1e60d1058cd3..22eaf29ced57 100644 --- a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml +++ b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml @@ -219,8 +219,8 @@ definitions: description: The type of estimate type: string enum: - - stream - - sync + - STREAM + - SYNC namespace: description: The namespace of the stream type: string From 6cf24c510d010216b15ffadde6604caf77cd77b6 Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 28 Oct 2022 15:05:53 -0700 Subject: [PATCH 03/21] Faker emits TraceEstimateMessages --- .../connectors/source-faker/setup.py | 2 +- .../source-faker/source_faker/source.py | 35 +++++++++++- .../source-faker/unit_tests/unit_test.py | 54 ++++++++++++++++--- 3 files changed, 80 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/connectors/source-faker/setup.py b/airbyte-integrations/connectors/source-faker/setup.py index ab62499037f5..ca2136a928a5 100644 --- a/airbyte-integrations/connectors/source-faker/setup.py +++ b/airbyte-integrations/connectors/source-faker/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1", "mimesis==6.1.1"] +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2", "mimesis==6.1.1"] TEST_REQUIREMENTS = [ "pytest~=6.1", diff --git a/airbyte-integrations/connectors/source-faker/source_faker/source.py b/airbyte-integrations/connectors/source-faker/source_faker/source.py index 6e664751df24..bcde2a6b3b20 100644 --- a/airbyte-integrations/connectors/source-faker/source_faker/source.py +++ b/airbyte-integrations/connectors/source-faker/source_faker/source.py @@ -41,8 +41,11 @@ def check(self, logger: AirbyteLogger, config: Dict[str, any]) -> AirbyteConnect :return: AirbyteConnectionStatus indicating a Success or Failure """ - # As this is an in-memory source, it always succeeds - return AirbyteConnectionStatus(status=Status.SUCCEEDED) + if type(config["count"]) == int or type(config["count"]) == float: + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + else: + return AirbyteConnectionStatus(status=Status.FAILED) + def discover(self, logger: AirbyteLogger, config: Dict[str, any]) -> AirbyteCatalog: """ @@ -136,6 +139,10 @@ def read( records_in_sync = 0 records_in_page = 0 + users_estimate = count - cursor + yield generate_estimate(stream.stream.name, users_estimate) + yield generate_estimate("Purchases", users_estimate * 2) # a fuzzy guess, some users have purchases, some don't + for i in range(cursor, count): user = generate_user(person, dt, i) yield generate_record(stream, user) @@ -162,6 +169,7 @@ def read( elif stream.stream.name == "Products": products = generate_products() + yield generate_estimate(stream.stream.name, len(products)) for p in products: yield generate_record(stream, p) yield generate_state(state, stream, {"product_count": len(products)}) @@ -193,6 +201,18 @@ def generate_record(stream: any, data: any): ) +def generate_estimate(stream_name: str, total: int): + # TODO: Use the updated CDK classes when published, e.g. `return AirbyteMessage`` + + data = { + "type": "TRACE", + "emitted_at": int(datetime.datetime.now().timestamp() * 1000), + "trace": {"type": "ESTIMATE", "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total}}, + } + + return HackedAirbyteTraceMessage(data) + + def log_stream(stream_name: str): return AirbyteMessage( type=Type.LOG, @@ -300,3 +320,14 @@ def format_airbyte_time(d: datetime): s = s.replace(" ", "T") s += "+00:00" return s + + +class HackedAirbyteTraceMessage: + data = {} + type = "TRACE" + + def __init__(self, data: dict): + self.data = data + + def json(self, exclude_unset): + return json.dumps(self.data) diff --git a/airbyte-integrations/connectors/source-faker/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-faker/unit_tests/unit_test.py index 0db54325bffa..68a4351ba2b5 100644 --- a/airbyte-integrations/connectors/source-faker/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-faker/unit_tests/unit_test.py @@ -44,7 +44,13 @@ def test_read_small_random_data(): logger = None config = {"count": 10} catalog = ConfiguredAirbyteCatalog( - streams=[{"stream": {"name": "Users", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}] + streams=[ + { + "stream": {"name": "Users", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] ) state = {} iterator = source.read(logger, config, catalog, state) @@ -70,8 +76,16 @@ def test_read_big_random_data(): config = {"count": 1000, "records_per_slice": 100, "records_per_sync": 1000} catalog = ConfiguredAirbyteCatalog( streams=[ - {"stream": {"name": "Users", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}, - {"stream": {"name": "Products", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}, + { + "stream": {"name": "Users", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + }, + { + "stream": {"name": "Products", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + }, ] ) state = {} @@ -98,9 +112,21 @@ def test_with_purchases(): config = {"count": 1000, "records_per_sync": 1000} catalog = ConfiguredAirbyteCatalog( streams=[ - {"stream": {"name": "Users", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}, - {"stream": {"name": "Products", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}, - {"stream": {"name": "Purchases", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}, + { + "stream": {"name": "Users", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + }, + { + "stream": {"name": "Products", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + }, + { + "stream": {"name": "Purchases", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + }, ] ) state = {} @@ -128,7 +154,13 @@ def test_sync_ends_with_limit(): logger = None config = {"count": 100, "records_per_sync": 5} catalog = ConfiguredAirbyteCatalog( - streams=[{"stream": {"name": "Users", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}] + streams=[ + { + "stream": {"name": "Users", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] ) state = {} iterator = source.read(logger, config, catalog, state) @@ -157,7 +189,13 @@ def test_read_with_seed(): logger = None config = {"count": 1, "seed": 100} catalog = ConfiguredAirbyteCatalog( - streams=[{"stream": {"name": "Users", "json_schema": {}}, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite"}] + streams=[ + { + "stream": {"name": "Users", "json_schema": {}, "supported_sync_modes": ["full_refresh"]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] ) state = {} iterator = source.read(logger, config, catalog, state) From b759a72ff3de83dc2320151447af453bb528eaaf Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 28 Oct 2022 15:16:33 -0700 Subject: [PATCH 04/21] move emitted_at --- .../connectors/source-faker/source_faker/source.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-faker/source_faker/source.py b/airbyte-integrations/connectors/source-faker/source_faker/source.py index bcde2a6b3b20..57cad86ff7de 100644 --- a/airbyte-integrations/connectors/source-faker/source_faker/source.py +++ b/airbyte-integrations/connectors/source-faker/source_faker/source.py @@ -46,7 +46,6 @@ def check(self, logger: AirbyteLogger, config: Dict[str, any]) -> AirbyteConnect else: return AirbyteConnectionStatus(status=Status.FAILED) - def discover(self, logger: AirbyteLogger, config: Dict[str, any]) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. @@ -206,8 +205,11 @@ def generate_estimate(stream_name: str, total: int): data = { "type": "TRACE", - "emitted_at": int(datetime.datetime.now().timestamp() * 1000), - "trace": {"type": "ESTIMATE", "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total}}, + "trace": { + "emitted_at": int(datetime.datetime.now().timestamp() * 1000), + "type": "ESTIMATE", + "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total}, + }, } return HackedAirbyteTraceMessage(data) From 5f3466e1cd67beea1f6c40d70fcaa5ae497e14fc Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 28 Oct 2022 16:49:54 -0700 Subject: [PATCH 05/21] Mock out job history handler so we have running throughput. --- airbyte-api/src/main/openapi/config.yaml | 6 + .../server/handlers/JobHistoryHandler.java | 41 +++++ .../api/generated-api-html/index.html | 142 ++++++++++++++---- 3 files changed, 161 insertions(+), 28 deletions(-) diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 5681bbdbcc70..56c36758e27c 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -4022,6 +4022,12 @@ components: recordsCommitted: type: integer format: int64 + estimatedRecords: + type: integer + format: int64 + estimatedBytes: + type: integer + format: int64 AttemptStreamStats: type: object required: diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index e25bee37f04b..e5406db51bef 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -5,7 +5,10 @@ package io.airbyte.server.handlers; import com.google.common.base.Preconditions; +import io.airbyte.api.model.generated.AttemptInfoRead; import io.airbyte.api.model.generated.AttemptNormalizationStatusReadList; +import io.airbyte.api.model.generated.AttemptStats; +import io.airbyte.api.model.generated.AttemptStreamStats; import io.airbyte.api.model.generated.ConnectionRead; import io.airbyte.api.model.generated.DestinationDefinitionIdRequestBody; import io.airbyte.api.model.generated.DestinationDefinitionRead; @@ -40,12 +43,14 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.Random; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; public class JobHistoryHandler { + private static final Random RANDOM = new Random(); private final ConnectionsHandler connectionsHandler; private final SourceHandler sourceHandler; private final DestinationHandler destinationHandler; @@ -122,6 +127,42 @@ public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); final JobInfoRead jobinfoRead = jobConverter.getJobInfoRead(job); + // Mock out this data for now. + final var totalRecords = 1000; + final var totalBytes = 100_000; + + for (final AttemptInfoRead attempt : jobinfoRead.getAttempts()) { + final var streamStats = attempt.getAttempt().getStreamStats(); + // if this doesn't exist, mock something. + if (streamStats == null) { + final var stats = List.of(new AttemptStreamStats().streamName("foo stream"), new AttemptStreamStats().streamName("bar stream")); + attempt.getAttempt().streamStats(stats); + } + + for (final AttemptStreamStats stats : attempt.getAttempt().getStreamStats()) { + if (stats.getStats() == null) { + stats.stats(new AttemptStats()); + } + + final var s = stats.getStats(); + final var runningSync = s.getBytesEmitted() == null; + + // if the sync is not done, this is empty, so we mock it out now. + if (runningSync) { + s.bytesEmitted(RANDOM.nextLong(totalBytes)); + s.recordsEmitted(RANDOM.nextLong(totalRecords)); + + // Set estimate to a random buffer of the estimated to show a 'progress-like' bar. + s.estimatedBytes(s.getBytesEmitted() * RANDOM.nextLong(2, 5)); + s.estimatedRecords(s.getRecordsEmitted() * RANDOM.nextLong(2, 5)); + } else { + // if it's done, set to the correct number. + s.estimatedBytes(s.getBytesEmitted()); + s.estimatedRecords(s.getRecordsEmitted()); + } + } + } + return buildJobDebugInfoRead(jobinfoRead); } diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index 48dd1c63f7be..c17a2cc93107 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -1151,6 +1151,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -1159,13 +1161,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -1177,6 +1179,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1185,6 +1189,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1202,6 +1208,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -1210,13 +1218,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -1228,6 +1236,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1236,6 +1246,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1507,6 +1519,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -1515,13 +1529,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -1533,6 +1547,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1541,6 +1557,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1558,6 +1576,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -1566,13 +1586,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -1584,6 +1604,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -1592,6 +1614,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4081,6 +4105,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4089,13 +4115,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4107,6 +4133,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4115,6 +4143,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4132,6 +4162,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4140,13 +4172,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4158,6 +4190,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4166,6 +4200,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4375,6 +4411,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4383,13 +4421,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4401,6 +4439,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4409,6 +4449,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4426,6 +4468,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4434,13 +4478,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4452,6 +4496,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4460,6 +4506,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4550,6 +4598,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4558,13 +4608,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4576,6 +4626,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4584,6 +4636,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4601,6 +4655,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4609,13 +4665,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4627,6 +4683,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4635,6 +4693,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4798,6 +4858,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4806,13 +4868,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4824,6 +4886,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4832,6 +4896,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4844,6 +4910,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4852,13 +4920,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4870,6 +4938,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4878,6 +4948,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4907,6 +4979,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4915,13 +4989,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4933,6 +5007,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4941,6 +5017,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4953,6 +5031,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "failureSummary" : { @@ -4961,13 +5041,13 @@

Example data

"stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 }, { "retryable" : true, "stacktrace" : "stacktrace", "internalMessage" : "internalMessage", "externalMessage" : "externalMessage", - "timestamp" : 1 + "timestamp" : 6 } ], "partialSuccess" : true }, @@ -4979,6 +5059,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -4987,6 +5069,8 @@

Example data

"stateMessagesEmitted" : 7, "recordsCommitted" : 1, "bytesEmitted" : 4, + "estimatedBytes" : 1, + "estimatedRecords" : 1, "recordsEmitted" : 2 }, "streamName" : "streamName" @@ -10275,6 +10359,8 @@

AttemptStats - bytesEmitted (optional)
Long format: int64
stateMessagesEmitted (optional)
Long format: int64
recordsCommitted (optional)
Long format: int64
+
estimatedRecords (optional)
Long format: int64
+
estimatedBytes (optional)
Long format: int64
From d0355ca5cf989a41e322d48e78dcb672b4c9b6b4 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 28 Oct 2022 17:34:13 -0700 Subject: [PATCH 06/21] Also mock the jobs/list api. --- .../server/handlers/JobHistoryHandler.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index e5406db51bef..9d51a200028e 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -7,6 +7,7 @@ import com.google.common.base.Preconditions; import io.airbyte.api.model.generated.AttemptInfoRead; import io.airbyte.api.model.generated.AttemptNormalizationStatusReadList; +import io.airbyte.api.model.generated.AttemptRead; import io.airbyte.api.model.generated.AttemptStats; import io.airbyte.api.model.generated.AttemptStreamStats; import io.airbyte.api.model.generated.ConnectionRead; @@ -109,6 +110,44 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept .map(JobConverter::getJobWithAttemptsRead) .collect(Collectors.toList()); + // Mock out this data for now. + final var totalRecords = 1000; + final var totalBytes = 100_000; + + for (JobWithAttemptsRead jwar: jobReads) { + for (final AttemptRead attempt : jwar.getAttempts()) { + final var streamStats = attempt.getStreamStats(); + // if this doesn't exist, mock something. + if (streamStats == null) { + final var stats = List.of(new AttemptStreamStats().streamName("foo stream"), new AttemptStreamStats().streamName("bar stream")); + attempt.streamStats(stats); + } + + for (final AttemptStreamStats stats : attempt.getStreamStats()) { + if (stats.getStats() == null) { + stats.stats(new AttemptStats()); + } + + final var s = stats.getStats(); + final var runningSync = s.getBytesEmitted() == null; + + // if the sync is not done, this is empty, so we mock it out now. + if (runningSync) { + s.bytesEmitted(RANDOM.nextLong(totalBytes)); + s.recordsEmitted(RANDOM.nextLong(totalRecords)); + + // Set estimate to a random buffer of the estimated to show a 'progress-like' bar. + s.estimatedBytes(s.getBytesEmitted() * RANDOM.nextLong(2, 5)); + s.estimatedRecords(s.getRecordsEmitted() * RANDOM.nextLong(2, 5)); + } else { + // if it's done, set to the correct number. + s.estimatedBytes(s.getBytesEmitted()); + s.estimatedRecords(s.getRecordsEmitted()); + } + } + } + } + return new JobReadList().jobs(jobReads).totalJobCount(totalJobCount); } From 8c176f55ae91b62107d607eccc82ad15b72a4a3b Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 28 Oct 2022 17:34:58 -0700 Subject: [PATCH 07/21] [wip] top-level progress bar --- airbyte-webapp/package-lock.json | 49 +++++++++++++++++++ airbyte-webapp/package.json | 1 + .../JobItem/components/MainInfo.tsx | 4 ++ .../components/ProgressBar/ProgressBar.tsx | 41 ++++++++++++++++ .../src/components/ProgressBar/index.tsx | 1 + 5 files changed, 96 insertions(+) create mode 100644 airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx create mode 100644 airbyte-webapp/src/components/ProgressBar/index.tsx diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index d6a677da4c03..59f32837ba95 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -31,6 +31,7 @@ "lodash": "^4.17.21", "mdast": "^3.0.0", "query-string": "^6.13.1", + "rc-progress": "^3.4.0", "react": "^17.0.2", "react-dom": "^17.0.2", "react-helmet-async": "^1.3.0", @@ -36065,6 +36066,34 @@ "rc": "cli.js" } }, + "node_modules/rc-progress": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/rc-progress/-/rc-progress-3.4.0.tgz", + "integrity": "sha512-ZuMyOzzTkZnn+EKqGQ7YHzrvGzBtcCCVjx1McC/E/pMTvr6GWVfVRSawDlWsscxsJs7MkqSTwCO6Lu4IeoY2zQ==", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.6", + "rc-util": "^5.16.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-util": { + "version": "5.24.4", + "resolved": "https://registry.npmjs.org/rc-util/-/rc-util-5.24.4.tgz", + "integrity": "sha512-2a4RQnycV9eV7lVZPEJ7QwJRPlZNc06J7CwcwZo4vIHr3PfUqtYgl1EkUV9ETAc6VRRi8XZOMFhYG63whlIC9Q==", + "dependencies": { + "@babel/runtime": "^7.18.3", + "react-is": "^16.12.0", + "shallowequal": "^1.1.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, "node_modules/rc/node_modules/strip-json-comments": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", @@ -75387,6 +75416,26 @@ } } }, + "rc-progress": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/rc-progress/-/rc-progress-3.4.0.tgz", + "integrity": "sha512-ZuMyOzzTkZnn+EKqGQ7YHzrvGzBtcCCVjx1McC/E/pMTvr6GWVfVRSawDlWsscxsJs7MkqSTwCO6Lu4IeoY2zQ==", + "requires": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.6", + "rc-util": "^5.16.1" + } + }, + "rc-util": { + "version": "5.24.4", + "resolved": "https://registry.npmjs.org/rc-util/-/rc-util-5.24.4.tgz", + "integrity": "sha512-2a4RQnycV9eV7lVZPEJ7QwJRPlZNc06J7CwcwZo4vIHr3PfUqtYgl1EkUV9ETAc6VRRi8XZOMFhYG63whlIC9Q==", + "requires": { + "@babel/runtime": "^7.18.3", + "react-is": "^16.12.0", + "shallowequal": "^1.1.0" + } + }, "react": { "version": "17.0.2", "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 53e0150b8a8e..3ae3273048e2 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -47,6 +47,7 @@ "lodash": "^4.17.21", "mdast": "^3.0.0", "query-string": "^6.13.1", + "rc-progress": "^3.4.0", "react": "^17.0.2", "react-dom": "^17.0.2", "react-helmet-async": "^1.3.0", diff --git a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx index c79a411bc233..ccd2550f94e0 100644 --- a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx +++ b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx @@ -5,6 +5,7 @@ import React, { useMemo } from "react"; import { FormattedDateParts, FormattedMessage, FormattedTimeParts } from "react-intl"; import { StatusIcon } from "components"; +import { ProgressBar } from "components/ProgressBar"; import { Cell, Row } from "components/SimpleTableComponents"; import { AttemptRead, JobStatus, SynchronousJobRead } from "core/request/AirbyteClient"; @@ -87,6 +88,9 @@ const MainInfo: React.FC = ({ job, attempts = [], isOpen, onExpan
{statusIcon}
{label} +
+ +
{attempts.length > 0 && ( <> {jobConfigType === "reset_connection" ? ( diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx new file mode 100644 index 000000000000..f585c1f898a2 --- /dev/null +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -0,0 +1,41 @@ +import { Line } from "rc-progress"; + +import { getJobStatus } from "components/JobItem/JobItem"; + +import { SynchronousJobRead } from "core/request/AirbyteClient"; +import { JobsWithJobs } from "pages/ConnectionPage/pages/ConnectionItemPage/JobsList"; + +export var ProgressBar = ({ percent, job }: { percent?: number; job: JobsWithJobs | SynchronousJobRead }) => { + if (!percent) { + percent = Math.random() * 100; + } + const jobStatus = getJobStatus(job); + + console.log(job); + + // colors from `_colors.scss` TODO: Use the SCSS variables maybe? + let color = "white"; + switch (jobStatus) { + case "pending": + color = "#cbc8ff"; + break; + case "running": + color = "#cbc8ff"; + break; + case "incomplete": + color = "#fdf8e1"; + break; + case "failed": + color = "#e64228"; + break; + case "succeeded": + color = "#67dae1"; + percent = 100; // just to be safe + break; + case "cancelled": + percent = 0; // just to be safe + break; + } + + return ; +}; diff --git a/airbyte-webapp/src/components/ProgressBar/index.tsx b/airbyte-webapp/src/components/ProgressBar/index.tsx new file mode 100644 index 000000000000..a550d58d84d6 --- /dev/null +++ b/airbyte-webapp/src/components/ProgressBar/index.tsx @@ -0,0 +1 @@ +export * from "./ProgressBar"; From 5a1c0b7c8b1a2884c0a049a8dc6f580e5a96e778 Mon Sep 17 00:00:00 2001 From: evantahler Date: Mon, 31 Oct 2022 11:37:58 -0700 Subject: [PATCH 08/21] UI Basics --- .../ProgressBar/ProgressBar.module.scss | 8 ++ .../components/ProgressBar/ProgressBar.tsx | 81 ++++++++++++++++--- airbyte-webapp/src/locales/en.json | 2 + 3 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 airbyte-webapp/src/components/ProgressBar/ProgressBar.module.scss diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.module.scss b/airbyte-webapp/src/components/ProgressBar/ProgressBar.module.scss new file mode 100644 index 000000000000..b2db942f3fed --- /dev/null +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.module.scss @@ -0,0 +1,8 @@ +@use "../../scss/colors"; +@use "../../scss/variables"; + +.container { + font-size: 12px; + line-height: 15px; + color: colors.$grey; +} diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index f585c1f898a2..e574b49c6b27 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -1,17 +1,50 @@ +import classNames from "classnames"; import { Line } from "rc-progress"; +import { useIntl } from "react-intl"; import { getJobStatus } from "components/JobItem/JobItem"; -import { SynchronousJobRead } from "core/request/AirbyteClient"; +import { AttemptRead, SynchronousJobRead } from "core/request/AirbyteClient"; +import Status from "core/statuses"; import { JobsWithJobs } from "pages/ConnectionPage/pages/ConnectionItemPage/JobsList"; -export var ProgressBar = ({ percent, job }: { percent?: number; job: JobsWithJobs | SynchronousJobRead }) => { - if (!percent) { - percent = Math.random() * 100; - } +import styles from "./ProgressBar.module.scss"; + +function isJobsWithJobs(job: JobsWithJobs | SynchronousJobRead): job is JobsWithJobs { + return (job as JobsWithJobs).attempts !== undefined; +} + +export var ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) => { + const { formatMessage } = useIntl(); + + let numerator = 0; + let denominator = 0; + let totalPercent = -1; + let latestAttempt: AttemptRead | undefined; + const jobStatus = getJobStatus(job); - console.log(job); + if (isJobsWithJobs(job)) { + if (job.attempts) { + latestAttempt = job.attempts[job.attempts?.length - 1]; + if (latestAttempt && latestAttempt.totalStats) { + const totalStats = latestAttempt.totalStats; + if (totalStats?.recordsEmitted && totalStats?.estimatedRecords) { + numerator = totalStats.recordsEmitted; + denominator = totalStats.estimatedRecords; + } + } else if (latestAttempt && !latestAttempt.totalStats && latestAttempt.streamStats) { + for (const stream of latestAttempt.streamStats) { + numerator += stream.stats.recordsEmitted ?? 0; + denominator += stream.stats.estimatedRecords ?? 0; + } + } + } + } else { + // TODO... maybe + } + + totalPercent = (numerator / denominator) * 100; // colors from `_colors.scss` TODO: Use the SCSS variables maybe? let color = "white"; @@ -30,12 +63,42 @@ export var ProgressBar = ({ percent, job }: { percent?: number; job: JobsWithJob break; case "succeeded": color = "#67dae1"; - percent = 100; // just to be safe + totalPercent = 100; // just to be safe break; case "cancelled": - percent = 0; // just to be safe + totalPercent = 0; // just to be safe break; } - return ; + return ( +
+ + {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( + <> +
+ {numerator} of {denominator} {formatMessage({ id: "estimate.syncedThusFar" })} +
+
+
+
Stream Stats:
+ {latestAttempt.streamStats?.map((stream) => { + const localNumerator = stream.stats.recordsEmitted; + const localDenominator = stream.stats.estimatedRecords; + + return ( +
+ {stream.streamName} -{" "} + {localNumerator && localDenominator + ? `${Math.round( + (localNumerator * 100) / localDenominator + )}% complete (${localNumerator} / ${localDenominator} records moved)` + : `${localNumerator} records moved so far`} +
+ ); + })} +
+ + )} +
+ ); }; diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index 8a5af3249a25..8b80b070301e 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -544,6 +544,8 @@ "errorView.unknown": "Unknown", "errorView.unknownError": "Unknown error occurred", + "estimate.syncedThusFar": "estimated records synced", + "frequency.manual": "Manual", "frequency.cron": "Cron", "frequency.minutes": "{value} min", From 848ee72850ba58fc56a03b8d6a8f4fe9b12f5565 Mon Sep 17 00:00:00 2001 From: evantahler Date: Mon, 31 Oct 2022 12:19:49 -0700 Subject: [PATCH 09/21] time estimates --- .../components/ProgressBar/ProgressBar.tsx | 66 +++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index e574b49c6b27..c4c2c3e2e40b 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -20,10 +20,33 @@ export var ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) = let numerator = 0; let denominator = 0; let totalPercent = -1; + let timeRemainingString = ""; let latestAttempt: AttemptRead | undefined; const jobStatus = getJobStatus(job); + // colors from `_colors.scss` TODO: Use the SCSS variables maybe? + let color = "white"; + switch (jobStatus) { + case "pending": + color = "#cbc8ff"; + break; + case "running": + color = "#cbc8ff"; + break; + case "incomplete": + color = "#fdf8e1"; + break; + case "failed": + color = "#e64228"; + return null; + case "succeeded": + color = "#67dae1"; + return null; + case "cancelled": + return null; + } + if (isJobsWithJobs(job)) { if (job.attempts) { latestAttempt = job.attempts[job.attempts?.length - 1]; @@ -44,30 +67,20 @@ export var ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) = // TODO... maybe } - totalPercent = (numerator / denominator) * 100; + totalPercent = Math.floor((numerator * 100) / denominator); - // colors from `_colors.scss` TODO: Use the SCSS variables maybe? - let color = "white"; - switch (jobStatus) { - case "pending": - color = "#cbc8ff"; - break; - case "running": - color = "#cbc8ff"; - break; - case "incomplete": - color = "#fdf8e1"; - break; - case "failed": - color = "#e64228"; - break; - case "succeeded": - color = "#67dae1"; - totalPercent = 100; // just to be safe - break; - case "cancelled": - totalPercent = 0; // just to be safe - break; + if (latestAttempt && latestAttempt.status === Status.RUNNING) { + const now = new Date().getTime(); + const elapsedTime = now - latestAttempt.createdAt * 1000; + const timeRemaining = Math.floor(elapsedTime / totalPercent) * (100 - totalPercent); // in ms + + const minutesRemaining = Math.ceil(timeRemaining / 1000 / 60); + const hoursRemaining = Math.ceil(minutesRemaining / 60); + if (minutesRemaining <= 60) { + timeRemainingString = `${minutesRemaining} minutes remaining`; + } else { + timeRemainingString = `${hoursRemaining} hours remaining`; + } } return ( @@ -76,17 +89,18 @@ export var ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) = {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( <>
- {numerator} of {denominator} {formatMessage({ id: "estimate.syncedThusFar" })} + {numerator} of {denominator} {formatMessage({ id: "estimate.syncedThusFar" })}{" "} + {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""}

Stream Stats:
- {latestAttempt.streamStats?.map((stream) => { + {latestAttempt.streamStats?.map((stream, idx) => { const localNumerator = stream.stats.recordsEmitted; const localDenominator = stream.stats.estimatedRecords; return ( -
+
{stream.streamName} -{" "} {localNumerator && localDenominator ? `${Math.round( From d422544602e6c13658fe13f6bf06d541616e26ba Mon Sep 17 00:00:00 2001 From: evantahler Date: Mon, 31 Oct 2022 12:58:55 -0700 Subject: [PATCH 10/21] fix test --- airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index c4c2c3e2e40b..ad612de8faed 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -14,7 +14,7 @@ function isJobsWithJobs(job: JobsWithJobs | SynchronousJobRead): job is JobsWith return (job as JobsWithJobs).attempts !== undefined; } -export var ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) => { +export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) => { const { formatMessage } = useIntl(); let numerator = 0; From eb10300ab40e8722b9e3a3266239556107094759 Mon Sep 17 00:00:00 2001 From: evantahler Date: Mon, 31 Oct 2022 13:31:59 -0700 Subject: [PATCH 11/21] more localization --- .../components/ProgressBar/ProgressBar.tsx | 30 +++++++++---------- airbyte-webapp/src/locales/en.json | 6 +++- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index ad612de8faed..36b0c6eca641 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -21,6 +21,7 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) let denominator = 0; let totalPercent = -1; let timeRemainingString = ""; + const unEstimatedStreams: string[] = []; let latestAttempt: AttemptRead | undefined; const jobStatus = getJobStatus(job); @@ -50,14 +51,11 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) if (isJobsWithJobs(job)) { if (job.attempts) { latestAttempt = job.attempts[job.attempts?.length - 1]; - if (latestAttempt && latestAttempt.totalStats) { - const totalStats = latestAttempt.totalStats; - if (totalStats?.recordsEmitted && totalStats?.estimatedRecords) { - numerator = totalStats.recordsEmitted; - denominator = totalStats.estimatedRecords; - } - } else if (latestAttempt && !latestAttempt.totalStats && latestAttempt.streamStats) { + if (latestAttempt && !latestAttempt.totalStats && latestAttempt.streamStats) { for (const stream of latestAttempt.streamStats) { + if (!stream.stats.recordsEmitted) { + unEstimatedStreams.push(`${stream.streamName}`); + } numerator += stream.stats.recordsEmitted ?? 0; denominator += stream.stats.estimatedRecords ?? 0; } @@ -73,13 +71,12 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) const now = new Date().getTime(); const elapsedTime = now - latestAttempt.createdAt * 1000; const timeRemaining = Math.floor(elapsedTime / totalPercent) * (100 - totalPercent); // in ms - const minutesRemaining = Math.ceil(timeRemaining / 1000 / 60); const hoursRemaining = Math.ceil(minutesRemaining / 60); if (minutesRemaining <= 60) { - timeRemainingString = `${minutesRemaining} minutes remaining`; + timeRemainingString = `${minutesRemaining} ${formatMessage({ id: "estimate.minutesRemaining" })}`; } else { - timeRemainingString = `${hoursRemaining} hours remaining`; + timeRemainingString = `${hoursRemaining} ${formatMessage({ id: "estimate.hoursRemaining" })}`; } } @@ -89,9 +86,10 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( <>
- {numerator} of {denominator} {formatMessage({ id: "estimate.syncedThusFar" })}{" "} + {numerator} / {denominator} {formatMessage({ id: "estimate.recordsSynced" })}{" "} {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""}
+ {unEstimatedStreams.length > 0 &&
{unEstimatedStreams.length} un-estimated streams
}

Stream Stats:
@@ -103,10 +101,12 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead })
{stream.streamName} -{" "} {localNumerator && localDenominator - ? `${Math.round( - (localNumerator * 100) / localDenominator - )}% complete (${localNumerator} / ${localDenominator} records moved)` - : `${localNumerator} records moved so far`} + ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ + id: "estimate.percentComplete", + })} (${localNumerator} / ${localDenominator} ${formatMessage({ + id: "estimate.recordsSynced", + })})` + : `${localNumerator} ${formatMessage({ id: "estimate.recordsSyncedThusFar" })} (no estimate)`}
); })} diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index 8b80b070301e..34a73e237a08 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -544,7 +544,11 @@ "errorView.unknown": "Unknown", "errorView.unknownError": "Unknown error occurred", - "estimate.syncedThusFar": "estimated records synced", + "estimate.minutesRemaining": "minutes remaining", + "estimate.hoursRemaining": "hours remaining", + "estimate.recordsSynced": "records synced", + "estimate.recordsSyncedThusFar": "records synced thus far", + "estimate.percentComplete": "% complete", "frequency.manual": "Manual", "frequency.cron": "Cron", From f9d510dea14cd380e9fe2f0faeb205be39696ea8 Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 11:31:31 -0700 Subject: [PATCH 12/21] estimate throughput --- .../JobItem/components/MainInfo.tsx | 2 +- .../components/ProgressBar/ProgressBar.tsx | 75 +++++++++++++++---- airbyte-webapp/src/locales/en.json | 4 + 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx index ccd2550f94e0..09516b7773dd 100644 --- a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx +++ b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx @@ -89,7 +89,7 @@ const MainInfo: React.FC = ({ job, attempts = [], isOpen, onExpan
{label}
- +
{attempts.length > 0 && ( <> diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index 36b0c6eca641..a826da53af4d 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -1,10 +1,10 @@ import classNames from "classnames"; import { Line } from "rc-progress"; -import { useIntl } from "react-intl"; +import { useIntl, FormattedMessage } from "react-intl"; import { getJobStatus } from "components/JobItem/JobItem"; -import { AttemptRead, SynchronousJobRead } from "core/request/AirbyteClient"; +import { AttemptRead, JobConfigType, SynchronousJobRead } from "core/request/AirbyteClient"; import Status from "core/statuses"; import { JobsWithJobs } from "pages/ConnectionPage/pages/ConnectionItemPage/JobsList"; @@ -14,12 +14,40 @@ function isJobsWithJobs(job: JobsWithJobs | SynchronousJobRead): job is JobsWith return (job as JobsWithJobs).attempts !== undefined; } -export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) => { +const formatBytes = (bytes?: number) => { + if (!bytes) { + return ; + } + + const k = 1024; + const dm = 2; + const sizes = ["Bytes", "KB", "MB", "GB", "TB"]; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + const result = parseFloat((bytes / Math.pow(k, i)).toFixed(dm)); + + return ; +}; + +export const ProgressBar = ({ + job, + jobConfigType, +}: { + job: JobsWithJobs | SynchronousJobRead; + jobConfigType: JobConfigType; +}) => { const { formatMessage } = useIntl(); - let numerator = 0; - let denominator = 0; - let totalPercent = -1; + if (jobConfigType !== "sync") { + return null; + } + + let numeratorRecords = 0; + let denominatorRecords = 0; + let totalPercentRecords = -1; + let numeratorBytes = 0; + let denominatorBytes = 0; + // let totalPercentBytes = -1; + let elapsedTimeMS = -1; let timeRemainingString = ""; const unEstimatedStreams: string[] = []; let latestAttempt: AttemptRead | undefined; @@ -56,8 +84,10 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) if (!stream.stats.recordsEmitted) { unEstimatedStreams.push(`${stream.streamName}`); } - numerator += stream.stats.recordsEmitted ?? 0; - denominator += stream.stats.estimatedRecords ?? 0; + numeratorRecords += stream.stats.recordsEmitted ?? 0; + denominatorRecords += stream.stats.estimatedRecords ?? 0; + numeratorBytes += stream.stats.bytesEmitted ?? 0; + denominatorBytes += stream.stats.estimatedBytes ?? 0; } } } @@ -65,12 +95,14 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) // TODO... maybe } - totalPercent = Math.floor((numerator * 100) / denominator); + totalPercentRecords = Math.floor((numeratorRecords * 100) / denominatorRecords); + // totalPercentBytes = Math.floor((numeratorBytes * 100) / denominatorBytes); + // chose to estimate time remaining based on records rather than bytes if (latestAttempt && latestAttempt.status === Status.RUNNING) { const now = new Date().getTime(); - const elapsedTime = now - latestAttempt.createdAt * 1000; - const timeRemaining = Math.floor(elapsedTime / totalPercent) * (100 - totalPercent); // in ms + elapsedTimeMS = now - latestAttempt.createdAt * 1000; + const timeRemaining = Math.floor(elapsedTimeMS / totalPercentRecords) * (100 - totalPercentRecords); // in ms const minutesRemaining = Math.ceil(timeRemaining / 1000 / 60); const hoursRemaining = Math.ceil(minutesRemaining / 60); if (minutesRemaining <= 60) { @@ -82,14 +114,27 @@ export const ProgressBar = ({ job }: { job: JobsWithJobs | SynchronousJobRead }) return (
- + {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( <>
- {numerator} / {denominator} {formatMessage({ id: "estimate.recordsSynced" })}{" "} - {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""} + {totalPercentRecords}% | {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""} + {unEstimatedStreams.length > 0 && ( +
+ {unEstimatedStreams.length} {formatMessage({ id: "estimate.unEstimatedStreams" })} +
+ )} +
+
+ {numeratorRecords} / {denominatorRecords} {formatMessage({ id: "estimate.recordsSynced" })} @{" "} + {Math.round((numeratorRecords / elapsedTimeMS) * 1000)} {formatMessage({ id: "estimate.recordsPerSecond" })}
- {unEstimatedStreams.length > 0 &&
{unEstimatedStreams.length} un-estimated streams
} +
+ {formatBytes(numeratorBytes)} / {formatBytes(denominatorBytes)}{" "} + {formatMessage({ id: "estimate.bytesSynced" })} @ {formatBytes((numeratorBytes * 1000) / elapsedTimeMS)} + {formatMessage({ id: "estimate.bytesPerSecond" })} +
+

Stream Stats:
diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index 34a73e237a08..efb2fd0aa78a 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -547,6 +547,10 @@ "estimate.minutesRemaining": "minutes remaining", "estimate.hoursRemaining": "hours remaining", "estimate.recordsSynced": "records synced", + "estimate.recordsPerSecond": "records/sec", + "estimate.bytesSynced": "synced", + "estimate.bytesPerSecond": "/sec", + "estimate.unEstimatedStreams": "un-estimated streams", "estimate.recordsSyncedThusFar": "records synced thus far", "estimate.percentComplete": "% complete", From 15538eff755da88e4ed9443731423a608f34865e Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 11:36:06 -0700 Subject: [PATCH 13/21] Include byte estimates --- .../connectors/source-faker/source_faker/source.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-faker/source_faker/source.py b/airbyte-integrations/connectors/source-faker/source_faker/source.py index 57cad86ff7de..b0925820a337 100644 --- a/airbyte-integrations/connectors/source-faker/source_faker/source.py +++ b/airbyte-integrations/connectors/source-faker/source_faker/source.py @@ -139,8 +139,8 @@ def read( records_in_page = 0 users_estimate = count - cursor - yield generate_estimate(stream.stream.name, users_estimate) - yield generate_estimate("Purchases", users_estimate * 2) # a fuzzy guess, some users have purchases, some don't + yield generate_estimate(stream.stream.name, users_estimate, 450) + yield generate_estimate("Purchases", users_estimate * 2, 230) # a fuzzy guess, some users have purchases, some don't for i in range(cursor, count): user = generate_user(person, dt, i) @@ -168,7 +168,7 @@ def read( elif stream.stream.name == "Products": products = generate_products() - yield generate_estimate(stream.stream.name, len(products)) + yield generate_estimate(stream.stream.name, len(products), 180) for p in products: yield generate_record(stream, p) yield generate_state(state, stream, {"product_count": len(products)}) @@ -200,7 +200,7 @@ def generate_record(stream: any, data: any): ) -def generate_estimate(stream_name: str, total: int): +def generate_estimate(stream_name: str, total: int, bytes_per_row: int): # TODO: Use the updated CDK classes when published, e.g. `return AirbyteMessage`` data = { @@ -208,7 +208,7 @@ def generate_estimate(stream_name: str, total: int): "trace": { "emitted_at": int(datetime.datetime.now().timestamp() * 1000), "type": "ESTIMATE", - "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total}, + "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total, "byte_estimate": total * bytes_per_row}, }, } From 8976ef825b9c4e6218e27c4729506e7bf39501df Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 11:51:48 -0700 Subject: [PATCH 14/21] update display when now estimate is present --- .../server/handlers/JobHistoryHandler.java | 6 ++-- .../components/ProgressBar/ProgressBar.tsx | 35 ++++++++++++------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index 9d51a200028e..6cb5db41d069 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -114,7 +114,7 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept final var totalRecords = 1000; final var totalBytes = 100_000; - for (JobWithAttemptsRead jwar: jobReads) { + for (JobWithAttemptsRead jwar : jobReads) { for (final AttemptRead attempt : jwar.getAttempts()) { final var streamStats = attempt.getStreamStats(); // if this doesn't exist, mock something. @@ -145,7 +145,7 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept s.estimatedRecords(s.getRecordsEmitted()); } } - } + } } return new JobReadList().jobs(jobReads).totalJobCount(totalJobCount); @@ -182,7 +182,7 @@ public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) if (stats.getStats() == null) { stats.stats(new AttemptStats()); } - + final var s = stats.getStats(); final var runningSync = s.getBytesEmitted() == null; diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index a826da53af4d..b2fcd6db1f8f 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -114,23 +114,34 @@ export const ProgressBar = ({ return (
- + {unEstimatedStreams.length === 0 && } {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( <> + {unEstimatedStreams.length === 0 && ( +
+ {totalPercentRecords}% | {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""} +
+ )} + {unEstimatedStreams.length > 0 && ( +
+ {unEstimatedStreams.length} {formatMessage({ id: "estimate.unEstimatedStreams" })} +
+ )}
- {totalPercentRecords}% | {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""} - {unEstimatedStreams.length > 0 && ( -
- {unEstimatedStreams.length} {formatMessage({ id: "estimate.unEstimatedStreams" })} -
- )} + {numeratorRecords} {unEstimatedStreams.length > 0 ? "" : `/ ${denominatorRecords}`}{" "} + {formatMessage({ id: "estimate.recordsSynced" })} @ {Math.round((numeratorRecords / elapsedTimeMS) * 1000)}{" "} + {formatMessage({ id: "estimate.recordsPerSecond" })}
- {numeratorRecords} / {denominatorRecords} {formatMessage({ id: "estimate.recordsSynced" })} @{" "} - {Math.round((numeratorRecords / elapsedTimeMS) * 1000)} {formatMessage({ id: "estimate.recordsPerSecond" })} -
-
- {formatBytes(numeratorBytes)} / {formatBytes(denominatorBytes)}{" "} + {formatBytes(numeratorBytes)}{" "} + {unEstimatedStreams.length > 0 ? ( + "" + ) : ( + <> + / + {formatBytes(denominatorBytes)} + + )}{" "} {formatMessage({ id: "estimate.bytesSynced" })} @ {formatBytes((numeratorBytes * 1000) / elapsedTimeMS)} {formatMessage({ id: "estimate.bytesPerSecond" })}
From 6867085f8532f77f15b47f60485b10f02df8d69d Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 1 Nov 2022 17:13:07 -0700 Subject: [PATCH 15/21] Backend changes to support saving state mid progress. (#18723) - introduce and implement save_stats route - add estimated columns to sync_stats table - all intermediate persistence layer changes to support this - job persistence tests for general sanity check - job/list and get_debug_info now returns totalStats - per-stream stats to be implement as follow up --- .env | 1 - airbyte-api/src/main/openapi/config.yaml | 33 +++++ .../airbyte/bootloader/BootloaderAppTest.java | 2 +- .../temporal/sync/OrchestratorConstants.java | 1 + .../general/DefaultReplicationWorker.java | 62 ++++++++- .../internal/AirbyteMessageTracker.java | 36 ++++- .../workers/internal/MessageTracker.java | 4 + .../src/main/resources/types/SyncStats.yaml | 6 + ...1__AddEstimatedRecordsAndBytesColumns.java | 34 +++++ .../resources/jobs_database/schema_dump.txt | 2 + .../job/DefaultJobPersistence.java | 103 ++++++++++++-- .../persistence/job/JobPersistence.java | 13 ++ .../job/DefaultJobPersistenceTest.java | 58 ++++++++ .../airbyte_protocol/airbyte_protocol.yaml | 4 +- .../server/apis/AttemptApiController.java | 10 +- .../airbyte/server/apis/ConfigurationApi.java | 6 + .../server/converters/JobConverter.java | 4 +- .../server/handlers/AttemptHandler.java | 15 ++ .../server/handlers/JobHistoryHandler.java | 129 +++++++++--------- .../api/generated-api-html/index.html | 116 ++++++++++++++++ 20 files changed, 550 insertions(+), 89 deletions(-) create mode 100644 airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_001__AddEstimatedRecordsAndBytesColumns.java diff --git a/.env b/.env index fdf824b82731..01a50bb867be 100644 --- a/.env +++ b/.env @@ -81,7 +81,6 @@ JOB_ERROR_REPORTING_STRATEGY=logging # Although not present as an env var, expected by Log4J configuration. LOG_LEVEL=INFO - ### APPLICATIONS ### # Worker # WORKERS_MICRONAUT_ENVIRONMENTS=control-plane diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 9ebb9a4f6268..851d9b992914 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -2264,6 +2264,26 @@ paths: application/json: schema: $ref: "#/components/schemas/InternalOperationResult" + /v1/attempt/save_stats: + post: + tags: + - attempt + - internal + summary: For worker to set running attempt stats. + operationId: saveStats + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/SaveStatsRequestBody" + required: true + responses: + "200": + description: Successful Operation + content: + application/json: + schema: + $ref: "#/components/schemas/InternalOperationResult" components: securitySchemes: @@ -4887,6 +4907,19 @@ components: processingTaskQueue: type: string default: "" + SaveStatsRequestBody: + type: object + required: + - jobId + - attemptNumber + - stats + properties: + jobId: + $ref: "#/components/schemas/JobId" + attemptNumber: + $ref: "#/components/schemas/AttemptNumber" + stats: + $ref: "#/components/schemas/AttemptStats" InternalOperationResult: type: object required: diff --git a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java index cf2630467ca4..371488b1098a 100644 --- a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java +++ b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java @@ -136,7 +136,7 @@ void testBootloaderAppBlankDb() throws Exception { bootloader.load(); val jobsMigrator = new JobsDatabaseMigrator(jobDatabase, jobsFlyway); - assertEquals("0.40.14.001", jobsMigrator.getLatestMigration().getVersion().getVersion()); + assertEquals("0.40.17.001", jobsMigrator.getLatestMigration().getVersion().getVersion()); val configsMigrator = new ConfigsDatabaseMigrator(configDatabase, configsFlyway); // this line should change with every new migration diff --git a/airbyte-commons-temporal/src/main/java/io/airbyte/commons/temporal/sync/OrchestratorConstants.java b/airbyte-commons-temporal/src/main/java/io/airbyte/commons/temporal/sync/OrchestratorConstants.java index 7a7c7806d7d0..6fb5efdb332b 100644 --- a/airbyte-commons-temporal/src/main/java/io/airbyte/commons/temporal/sync/OrchestratorConstants.java +++ b/airbyte-commons-temporal/src/main/java/io/airbyte/commons/temporal/sync/OrchestratorConstants.java @@ -46,6 +46,7 @@ public class OrchestratorConstants { EnvConfigs.DD_AGENT_HOST, EnvConfigs.DD_DOGSTATSD_PORT, EnvConfigs.METRIC_CLIENT, + EnvConfigs.INTERNAL_API_HOST, LOG_LEVEL, LogClientSingleton.GCS_LOG_BUCKET, LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java index 6ab4ef51bab7..72fa85754f45 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java @@ -11,7 +11,15 @@ import com.fasterxml.jackson.databind.ObjectMapper; import datadog.trace.api.Trace; +import io.airbyte.api.client.AirbyteApiClient; +import io.airbyte.api.client.invoker.generated.ApiClient; +import io.airbyte.api.client.invoker.generated.ApiException; +import io.airbyte.api.client.model.generated.AttemptStats; +import io.airbyte.api.client.model.generated.SaveStatsRequestBody; import io.airbyte.commons.io.LineGobbler; +import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.EnvConfigs; import io.airbyte.config.FailureReason; import io.airbyte.config.ReplicationAttemptSummary; import io.airbyte.config.ReplicationOutput; @@ -80,6 +88,27 @@ public class DefaultReplicationWorker implements ReplicationWorker { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultReplicationWorker.class); + private static final Configs CONFIGS = new EnvConfigs(); + private static final AirbyteApiClient CLIENT = getAirbyteApiClient(); + + // Passing env vars to the container orchestrator isn't working properly. Hack around this for now. + // TODO(Davin): This doesn't work for Kube. Need to figure it out. + private static AirbyteApiClient getAirbyteApiClient() { + if (CONFIGS.getWorkerEnvironment() == WorkerEnvironment.DOCKER) { + return new AirbyteApiClient( + new ApiClient().setScheme("http") + .setHost(CONFIGS.getAirbyteApiHost()) + .setPort(CONFIGS.getAirbyteApiPort()) + .setBasePath("/api")); + } + + return new AirbyteApiClient( + new ApiClient().setScheme("http") + .setHost("airbyte-server-svc") + .setPort(8001) + .setBasePath("/api")); + } + private final String jobId; private final int attempt; private final AirbyteSource source; @@ -180,7 +209,8 @@ public final ReplicationOutput run(final StandardSyncInput syncInput, final Path }); final CompletableFuture replicationThreadFuture = CompletableFuture.runAsync( - getReplicationRunnable(source, destination, cancelled, mapper, messageTracker, mdc, recordSchemaValidator, metricReporter, timeTracker), + getReplicationRunnable(source, destination, cancelled, mapper, messageTracker, mdc, recordSchemaValidator, metricReporter, timeTracker, + Long.parseLong(jobId), attempt), executors) .whenComplete((msg, ex) -> { if (ex != null) { @@ -347,7 +377,9 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, final Map mdc, final RecordSchemaValidator recordSchemaValidator, final WorkerMetricReporter metricReporter, - final ThreadedTimeTracker timeHolder) { + final ThreadedTimeTracker timeHolder, + final Long jobId, + final Integer attemptNumber) { return () -> { MDC.setContextMap(mdc); LOGGER.info("Replication thread started."); @@ -367,8 +399,15 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, validateSchema(recordSchemaValidator, validationErrors, airbyteMessage); final AirbyteMessage message = mapper.mapMessage(airbyteMessage); + // metrics block messageTracker.acceptFromSource(message); + // config/mutating platform state block + if (message.getType() == Type.STATE || message.getType() == Type.TRACE) { + saveStats(messageTracker, jobId, attemptNumber); + } + + // continue processing try { if (message.getType() == Type.RECORD || message.getType() == Type.STATE) { destination.accept(message); @@ -427,6 +466,25 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, }; } + private static void saveStats(MessageTracker messageTracker, Long jobId, Integer attemptNumber) { + final AttemptStats attemptStats = new AttemptStats() + .bytesEmitted(messageTracker.getTotalBytesEmitted()) + .recordsEmitted(messageTracker.getTotalRecordsEmitted()) + .estimatedBytes(messageTracker.getTotalBytesEstimated()) + .estimatedRecords(messageTracker.getTotalRecordsEstimated()); + + final SaveStatsRequestBody saveStatsRequestBody = new SaveStatsRequestBody() + .jobId(jobId) + .attemptNumber(attemptNumber) + .stats(attemptStats); + LOGGER.info("saving stats"); + try { + CLIENT.getAttemptApi().saveStats(saveStatsRequestBody); + } catch (ApiException e) { + LOGGER.warn("error trying to save stats: ", e); + } + } + private static void validateSchema(final RecordSchemaValidator recordSchemaValidator, final Map, Integer>> validationErrors, final AirbyteMessage message) { diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java index 96582131459c..64d2e519109a 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java @@ -25,7 +25,9 @@ import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteTraceMessage; import io.airbyte.workers.helper.FailureHelper; +import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException; import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException; +import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerOomException; import io.airbyte.workers.internal.state_aggregator.DefaultStateAggregator; import io.airbyte.workers.internal.state_aggregator.StateAggregator; import java.time.LocalDateTime; @@ -51,6 +53,10 @@ public class AirbyteMessageTracker implements MessageTracker { private final BiMap streamNameToIndex; private final Map streamToTotalBytesEmitted; private final Map streamToTotalRecordsEmitted; + + private final Map streamToTotalBytesEstimated; + + private final Map streamToTotalRecordsEstimated; private final StateDeltaTracker stateDeltaTracker; private final StateMetricsTracker stateMetricsTracker; private final List destinationErrorTraceMessages; @@ -93,6 +99,8 @@ protected AirbyteMessageTracker(final StateDeltaTracker stateDeltaTracker, this.hashFunction = Hashing.murmur3_32_fixed(); this.streamToTotalBytesEmitted = new HashMap<>(); this.streamToTotalRecordsEmitted = new HashMap<>(); + this.streamToTotalBytesEstimated = new HashMap<>(); + this.streamToTotalRecordsEstimated = new HashMap<>(); this.stateDeltaTracker = stateDeltaTracker; this.stateMetricsTracker = stateMetricsTracker; this.nextStreamIndex = 0; @@ -173,12 +181,12 @@ private void handleSourceEmittedState(final AirbyteStateMessage stateMessage) { if (!unreliableStateTimingMetrics) { stateMetricsTracker.addState(stateMessage, stateHash, timeEmittedStateMessage); } - } catch (final StateDeltaTracker.StateDeltaTrackerException e) { + } catch (final StateDeltaTrackerException e) { log.warn("The message tracker encountered an issue that prevents committed record counts from being reliably computed."); log.warn("This only impacts metadata and does not indicate a problem with actual sync data."); log.warn(e.getMessage(), e); unreliableCommittedCounts = true; - } catch (final StateMetricsTracker.StateMetricsTrackerOomException e) { + } catch (final StateMetricsTrackerOomException e) { log.warn("The StateMetricsTracker encountered an out of memory error that prevents new state metrics from being recorded"); log.warn("This only affects metrics and does not indicate a problem with actual sync data."); unreliableStateTimingMetrics = true; @@ -251,7 +259,7 @@ private void handleEmittedOrchestratorConnectorConfig(final AirbyteControlConnec private void handleEmittedTrace(final AirbyteTraceMessage traceMessage, final ConnectorType connectorType) { switch (traceMessage.getType()) { case ERROR -> handleEmittedErrorTrace(traceMessage, connectorType); - case ESTIMATE -> handleEmittedEstimateTrace(traceMessage, connectorType); + case ESTIMATE -> handleEmittedEstimateTrace(traceMessage); default -> log.warn("Invalid message type for trace message: {}", traceMessage); } } @@ -265,8 +273,16 @@ private void handleEmittedErrorTrace(final AirbyteTraceMessage errorTraceMessage } @SuppressWarnings("PMD") // until method is implemented - private void handleEmittedEstimateTrace(final AirbyteTraceMessage estimateTraceMessage, final ConnectorType connectorType) { - // TODO! + private void handleEmittedEstimateTrace(final AirbyteTraceMessage estimateTraceMessage) { + // Assume the estimate is a whole number and not a sum i.e. each estimate replaces the previous + // estimate. + + log.info("====== saving trace estimates"); + final var estimate = estimateTraceMessage.getEstimate(); + final var index = getStreamIndex(estimate.getName()); + + streamToTotalRecordsEstimated.put(index, estimate.getRowEstimate()); + streamToTotalBytesEstimated.put(index, estimate.getByteEstimate()); } private short getStreamIndex(final String streamName) { @@ -387,6 +403,11 @@ public long getTotalRecordsEmitted() { return streamToTotalRecordsEmitted.values().stream().reduce(0L, Long::sum); } + @Override + public long getTotalRecordsEstimated() { + return streamToTotalRecordsEstimated.values().stream().reduce(0L, Long::sum); + } + /** * Compute sum of emitted bytes across all streams. */ @@ -395,6 +416,11 @@ public long getTotalBytesEmitted() { return streamToTotalBytesEmitted.values().stream().reduce(0L, Long::sum); } + @Override + public long getTotalBytesEstimated() { + return streamToTotalBytesEstimated.values().stream().reduce(0L, Long::sum); + } + /** * Compute sum of committed record counts across all streams. If the delta tracker has exceeded its * capacity, return empty because committed record counts cannot be reliably computed. diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java index 86994fd785c8..57d5c1320815 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java @@ -81,6 +81,8 @@ public interface MessageTracker { */ long getTotalRecordsEmitted(); + long getTotalRecordsEstimated(); + /** * Get the overall emitted bytes. This includes messages that were emitted by the source, but never * committed by the destination. @@ -89,6 +91,8 @@ public interface MessageTracker { */ long getTotalBytesEmitted(); + long getTotalBytesEstimated(); + /** * Get the overall committed record count. * diff --git a/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml b/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml index 6410a3695292..20f4c3b9a6d1 100644 --- a/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml +++ b/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml @@ -47,3 +47,9 @@ properties: destinationWriteEndTime: type: integer description: The exit time of the destination container/pod + estimatedBytes: + type: integer + description: The total estimated number of bytes for the sync + estimatedRecords: + type: integer + description: The total estimated number of records for the sync diff --git a/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_001__AddEstimatedRecordsAndBytesColumns.java b/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_001__AddEstimatedRecordsAndBytesColumns.java new file mode 100644 index 000000000000..316a6d514f5d --- /dev/null +++ b/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_001__AddEstimatedRecordsAndBytesColumns.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.jobs.migrations; + +import org.flywaydb.core.api.migration.BaseJavaMigration; +import org.flywaydb.core.api.migration.Context; +import org.jooq.DSLContext; +import org.jooq.impl.DSL; +import org.jooq.impl.SQLDataType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class V0_40_17_001__AddEstimatedRecordsAndBytesColumns extends BaseJavaMigration { + + private static final Logger LOGGER = LoggerFactory.getLogger(V0_40_17_001__AddEstimatedRecordsAndBytesColumns.class); + + @Override + public void migrate(final Context context) throws Exception { + LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); + + // Warning: please do not use any jOOQ generated code to write a migration. + // As database schema changes, the generated jOOQ code can be deprecated. So + // old migration may not compile if there is any generated code. + final DSLContext ctx = DSL.using(context.getConnection()); + ctx.alterTable("sync_stats") + .add( + DSL.field("estimated_records", SQLDataType.BIGINT.nullable(true)), + DSL.field("estimated_bytes", SQLDataType.BIGINT.nullable(true))) + .execute(); + } + +} diff --git a/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt b/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt index 15cd985a9118..aec8a73238a2 100644 --- a/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt +++ b/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt @@ -75,6 +75,8 @@ create table "public"."sync_stats"( "max_seconds_between_state_message_emitted_and_committed" int8 null, "created_at" timestamptz(35) not null default null, "updated_at" timestamptz(35) not null default null, + "estimated_records" int8 null, + "estimated_bytes" int8 null, constraint "sync_stats_pkey" primary key ("id") ); diff --git a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java index 1f5f5b2a2381..aa26572a4879 100644 --- a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java +++ b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java @@ -349,21 +349,42 @@ public void writeOutput(final long jobId, final int attemptNumber, final JobOutp attemptNumber).stream().findFirst(); final Long attemptId = record.get().get("id", Long.class); - ctx.insertInto(SYNC_STATS) - .set(SYNC_STATS.ID, UUID.randomUUID()) - .set(SYNC_STATS.UPDATED_AT, now) - .set(SYNC_STATS.CREATED_AT, now) - .set(SYNC_STATS.ATTEMPT_ID, attemptId) - .set(SYNC_STATS.BYTES_EMITTED, syncStats.getBytesEmitted()) - .set(SYNC_STATS.RECORDS_EMITTED, syncStats.getRecordsEmitted()) - .set(SYNC_STATS.RECORDS_COMMITTED, syncStats.getRecordsCommitted()) - .set(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED, syncStats.getSourceStateMessagesEmitted()) - .set(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED, syncStats.getDestinationStateMessagesEmitted()) - .set(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMaxSecondsBeforeSourceStateMessageEmitted()) - .set(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMeanSecondsBeforeSourceStateMessageEmitted()) - .set(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMaxSecondsBetweenStateMessageEmittedandCommitted()) - .set(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMeanSecondsBetweenStateMessageEmittedandCommitted()) - .execute(); + final var needToCreate = !ctx.fetchExists(SYNC_STATS, SYNC_STATS.ATTEMPT_ID.eq(attemptId)); + // A record might already created by the writeSyncStats method. + // TODO(Davin): This is ugly and can be removed. + if (needToCreate) { + ctx.insertInto(SYNC_STATS) + .set(SYNC_STATS.ID, UUID.randomUUID()) + .set(SYNC_STATS.ATTEMPT_ID, attemptId) + .set(SYNC_STATS.CREATED_AT, now) + .set(SYNC_STATS.UPDATED_AT, now) + .set(SYNC_STATS.BYTES_EMITTED, syncStats.getBytesEmitted()) + .set(SYNC_STATS.RECORDS_EMITTED, syncStats.getRecordsEmitted()) + .set(SYNC_STATS.RECORDS_COMMITTED, syncStats.getRecordsCommitted()) + .set(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED, syncStats.getSourceStateMessagesEmitted()) + .set(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED, syncStats.getDestinationStateMessagesEmitted()) + .set(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMaxSecondsBeforeSourceStateMessageEmitted()) + .set(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMeanSecondsBeforeSourceStateMessageEmitted()) + .set(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMaxSecondsBetweenStateMessageEmittedandCommitted()) + .set(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, + syncStats.getMeanSecondsBetweenStateMessageEmittedandCommitted()) + .execute(); + } else { + ctx.update(SYNC_STATS) + .set(SYNC_STATS.UPDATED_AT, now) + .set(SYNC_STATS.BYTES_EMITTED, syncStats.getBytesEmitted()) + .set(SYNC_STATS.RECORDS_EMITTED, syncStats.getRecordsEmitted()) + .set(SYNC_STATS.RECORDS_COMMITTED, syncStats.getRecordsCommitted()) + .set(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED, syncStats.getSourceStateMessagesEmitted()) + .set(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED, syncStats.getDestinationStateMessagesEmitted()) + .set(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMaxSecondsBeforeSourceStateMessageEmitted()) + .set(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMeanSecondsBeforeSourceStateMessageEmitted()) + .set(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMaxSecondsBetweenStateMessageEmittedandCommitted()) + .set(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, + syncStats.getMeanSecondsBetweenStateMessageEmittedandCommitted()) + .where(SYNC_STATS.ATTEMPT_ID.eq(attemptId)) + .execute(); + } if (normalizationSummary != null) { ctx.insertInto(NORMALIZATION_SUMMARIES) @@ -382,6 +403,47 @@ public void writeOutput(final long jobId, final int attemptNumber, final JobOutp } + @Override + public void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, long estimatedBytes, long recordsEmitted, long bytesEmitted) + throws IOException { + // Although the attempt table's output has a copy of the sync summary, we do not update it for + // running sync stat updates. + final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC); + jobDatabase.transaction(ctx -> { + final Optional record = + ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, + attemptNumber).stream().findFirst(); + final Long attemptId = record.get().get("id", Long.class); + + final var isExisting = ctx.fetchExists(SYNC_STATS, SYNC_STATS.ATTEMPT_ID.eq(attemptId)); + + if (isExisting) { + ctx.update(SYNC_STATS) + .set(SYNC_STATS.BYTES_EMITTED, bytesEmitted) + .set(SYNC_STATS.RECORDS_EMITTED, recordsEmitted) + .set(SYNC_STATS.ESTIMATED_BYTES, estimatedBytes) + .set(SYNC_STATS.ESTIMATED_RECORDS, estimatedRecords) + .set(SYNC_STATS.UPDATED_AT, now) + .where(SYNC_STATS.ATTEMPT_ID.eq(attemptId)) + .execute(); + return null; + } + + // does this upsert work? + ctx.insertInto(SYNC_STATS) + .set(SYNC_STATS.ID, UUID.randomUUID()) + .set(SYNC_STATS.UPDATED_AT, now) + .set(SYNC_STATS.CREATED_AT, now) + .set(SYNC_STATS.ATTEMPT_ID, attemptId) + .set(SYNC_STATS.BYTES_EMITTED, bytesEmitted) + .set(SYNC_STATS.RECORDS_EMITTED, recordsEmitted) + .set(SYNC_STATS.ESTIMATED_BYTES, estimatedBytes) + .set(SYNC_STATS.ESTIMATED_RECORDS, estimatedRecords) + .execute(); + return null; + }); + } + @Override public void writeAttemptFailureSummary(final long jobId, final int attemptNumber, final AttemptFailureSummary failureSummary) throws IOException { final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC); @@ -403,6 +465,15 @@ public List getSyncStats(final Long attemptId) throws IOException { .toList()); } + @Override + public Long getAttemptId(Long jobId, Long attemptNumber) throws IOException { + final Optional record = + jobDatabase.query(ctx -> ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, + attemptNumber).stream().findFirst()); + + return record.get().get("id", Long.class); + } + @Override public List getNormalizationSummary(final Long attemptId) throws IOException, JsonProcessingException { return jobDatabase @@ -417,6 +488,8 @@ private static RecordMapper getSyncStatsRecordMapper() { .withSourceStateMessagesEmitted(record.get(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED)) .withDestinationStateMessagesEmitted(record.get(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED)) .withRecordsCommitted(record.get(SYNC_STATS.RECORDS_COMMITTED)) + .withEstimatedBytes(record.get(SYNC_STATS.ESTIMATED_BYTES)) + .withEstimatedRecords(record.get(SYNC_STATS.ESTIMATED_RECORDS)) .withMeanSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) .withMaxSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) .withMeanSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED)) diff --git a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java index e5b389cae2f4..3a3d67b7ae20 100644 --- a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java +++ b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java @@ -36,6 +36,16 @@ public interface JobPersistence { List getSyncStats(Long attemptId) throws IOException; + /** + * Return the id of the record in the attempt table corresponding to that job and attempt combination. This is useful to index into other attempt-scoped + * metadata. + * @param jobId + * @param attemptNumber + * @return + * @throws IOException + */ + Long getAttemptId(Long jobId, Long attemptNumber) throws IOException; + List getNormalizationSummary(Long attemptId) throws IOException; Job getJob(long jobId) throws IOException; @@ -136,6 +146,9 @@ public interface JobPersistence { */ void writeOutput(long jobId, int attemptNumber, JobOutput output) throws IOException; + void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, long estimatedBytes, long recordsEmitted, long bytesEmitted) + throws IOException; + /** * Writes a summary of all failures that occurred during the attempt. * diff --git a/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java b/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java index 06cdf5c4a2c5..fa221d512e5c 100644 --- a/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java +++ b/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java @@ -308,6 +308,64 @@ void testWriteOutput() throws IOException, SQLException { assertEquals(List.of(failureReason1, failureReason2), storedNormalizationSummary.getFailures()); } + @Nested + class SyncStatsTest { + + @Test + @DisplayName("Writing sync stats the first time should only write record and bytes information correctly") + void testWriteSyncStatsFirst() throws IOException, SQLException { + final long jobId = jobPersistence.enqueueJob(SCOPE, SPEC_JOB_CONFIG).orElseThrow(); + final int attemptNumber = jobPersistence.createAttempt(jobId, LOG_PATH); + jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000); + + final Optional record = + jobDatabase.query(ctx -> ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, + attemptNumber).stream().findFirst()); + final Long attemptId = record.get().get("id", Long.class); + + final var stats = jobPersistence.getSyncStats(attemptId).stream().findFirst().get(); + assertEquals(1000, stats.getBytesEmitted()); + assertEquals(1000, stats.getRecordsEmitted()); + assertEquals(1000, stats.getEstimatedBytes()); + assertEquals(1000, stats.getEstimatedRecords()); + + assertEquals(null, stats.getRecordsCommitted()); + assertEquals(null, stats.getDestinationStateMessagesEmitted()); + } + + @Test + @DisplayName("Writing sync stats multiple times should write record and bytes information correctly without exceptions") + void testWriteSyncStatsRepeated() throws IOException, SQLException { + final long jobId = jobPersistence.enqueueJob(SCOPE, SPEC_JOB_CONFIG).orElseThrow(); + final int attemptNumber = jobPersistence.createAttempt(jobId, LOG_PATH); + + jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000); + + final Optional record = + jobDatabase.query(ctx -> ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, + attemptNumber).stream().findFirst()); + final Long attemptId = record.get().get("id", Long.class); + + var stat = jobPersistence.getSyncStats(attemptId).stream().findFirst().get(); + assertEquals(1000, stat.getBytesEmitted()); + assertEquals(1000, stat.getRecordsEmitted()); + assertEquals(1000, stat.getEstimatedBytes()); + assertEquals(1000, stat.getEstimatedRecords()); + + jobPersistence.writeSyncStats(jobId, attemptNumber, 2000, 2000, 2000, 2000); + var stats = jobPersistence.getSyncStats(attemptId); + assertEquals(1, stats.size()); + + stat = stats.stream().findFirst().get(); + assertEquals(2000, stat.getBytesEmitted()); + assertEquals(2000, stat.getRecordsEmitted()); + assertEquals(2000, stat.getEstimatedBytes()); + assertEquals(2000, stat.getEstimatedRecords()); + + } + + } + @Test @DisplayName("Should be able to read attemptFailureSummary that was written") void testWriteAttemptFailureSummary() throws IOException { diff --git a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml index 22eaf29ced57..8c60e9b6e0a6 100644 --- a/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml +++ b/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml @@ -226,10 +226,10 @@ definitions: type: string row_estimate: description: The estimated number of rows to be emitted by this sync for this stream - type: number + type: integer byte_estimate: description: The estimated number of bytes to be emitted by this sync for this stream - type: number + type: integer AirbyteControlMessage: type: object additionalProperties: true diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/AttemptApiController.java b/airbyte-server/src/main/java/io/airbyte/server/apis/AttemptApiController.java index 71274154cca4..f2b067023e66 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/AttemptApiController.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/AttemptApiController.java @@ -6,11 +6,12 @@ import io.airbyte.api.generated.AttemptApi; import io.airbyte.api.model.generated.InternalOperationResult; +import io.airbyte.api.model.generated.SaveStatsRequestBody; import io.airbyte.api.model.generated.SetWorkflowInAttemptRequestBody; import io.airbyte.server.handlers.AttemptHandler; import javax.ws.rs.Path; -@Path("/v1/attempt/set_workflow_in_attempt") +@Path("/v1/attempt/") public class AttemptApiController implements AttemptApi { private final AttemptHandler attemptHandler; @@ -20,6 +21,13 @@ public AttemptApiController(final AttemptHandler attemptHandler) { } @Override + // @Path("/v1/attempt/save_stats") + public InternalOperationResult saveStats(SaveStatsRequestBody requestBody) { + return ConfigurationApi.execute(() -> attemptHandler.saveStats(requestBody)); + } + + @Override + // @Path("/v1/attempt/set_workflow_in_attempt") public InternalOperationResult setWorkflowInAttempt(final SetWorkflowInAttemptRequestBody requestBody) { return ConfigurationApi.execute(() -> attemptHandler.setWorkflowInAttempt(requestBody)); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 6bc240c92a41..e11cb1182267 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -64,6 +64,7 @@ import io.airbyte.api.model.generated.PrivateDestinationDefinitionReadList; import io.airbyte.api.model.generated.PrivateSourceDefinitionRead; import io.airbyte.api.model.generated.PrivateSourceDefinitionReadList; +import io.airbyte.api.model.generated.SaveStatsRequestBody; import io.airbyte.api.model.generated.SetInstancewideDestinationOauthParamsRequestBody; import io.airbyte.api.model.generated.SetInstancewideSourceOauthParamsRequestBody; import io.airbyte.api.model.generated.SetWorkflowInAttemptRequestBody; @@ -375,6 +376,11 @@ public void revokeSourceDefinitionFromWorkspace(final SourceDefinitionIdWithWork }); } + @Override + public InternalOperationResult saveStats(SaveStatsRequestBody saveStatsRequestBody) { + return null; + } + // SOURCE SPECIFICATION @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java index 52c28f3640f1..1cf0b25e9be7 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java @@ -172,7 +172,9 @@ private static AttemptStats getTotalAttemptStats(final Attempt attempt) { .bytesEmitted(totalStats.getBytesEmitted()) .recordsEmitted(totalStats.getRecordsEmitted()) .stateMessagesEmitted(totalStats.getSourceStateMessagesEmitted()) - .recordsCommitted(totalStats.getRecordsCommitted()); + .recordsCommitted(totalStats.getRecordsCommitted()) + .estimatedRecords(totalStats.getEstimatedRecords()) + .estimatedBytes(totalStats.getEstimatedBytes()); } private static List getAttemptStreamStats(final Attempt attempt) { diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java index 83f86861d6b3..149c0c16b1c8 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java @@ -5,6 +5,7 @@ package io.airbyte.server.handlers; import io.airbyte.api.model.generated.InternalOperationResult; +import io.airbyte.api.model.generated.SaveStatsRequestBody; import io.airbyte.api.model.generated.SetWorkflowInAttemptRequestBody; import io.airbyte.persistence.job.JobPersistence; import java.io.IOException; @@ -33,4 +34,18 @@ public InternalOperationResult setWorkflowInAttempt( return new InternalOperationResult().succeeded(true); } + public InternalOperationResult saveStats(SaveStatsRequestBody requestBody) { + try { + // This is for the entire sync for now. + final var stats = requestBody.getStats(); + jobPersistence.writeSyncStats(requestBody.getJobId(), requestBody.getAttemptNumber(), + stats.getEstimatedRecords(), stats.getEstimatedBytes(), stats.getRecordsEmitted(), stats.getBytesEmitted()); + } catch (IOException ioe) { + LOGGER.error("IOException when setting temporal workflow in attempt;", ioe); + return new InternalOperationResult().succeeded(false); + } + + return new InternalOperationResult().succeeded(true); + } + } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index 6cb5db41d069..30890c133f2a 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -36,6 +36,7 @@ import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.persistence.job.JobPersistence; +import io.airbyte.persistence.job.models.Attempt; import io.airbyte.persistence.job.models.Job; import io.airbyte.persistence.job.models.JobStatus; import io.airbyte.server.converters.JobConverter; @@ -110,41 +111,44 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept .map(JobConverter::getJobWithAttemptsRead) .collect(Collectors.toList()); - // Mock out this data for now. - final var totalRecords = 1000; - final var totalBytes = 100_000; - for (JobWithAttemptsRead jwar : jobReads) { - for (final AttemptRead attempt : jwar.getAttempts()) { - final var streamStats = attempt.getStreamStats(); - // if this doesn't exist, mock something. - if (streamStats == null) { - final var stats = List.of(new AttemptStreamStats().streamName("foo stream"), new AttemptStreamStats().streamName("bar stream")); - attempt.streamStats(stats); + for (final AttemptRead a : jwar.getAttempts()) { + // I need the job + final var attemptId = jobPersistence.getAttemptId(jwar.getJob().getId(), a.getId()); + + final var syncStatList = jobPersistence.getSyncStats(attemptId); + if (syncStatList.size() == 0) { + // there should only be one returned. + continue; } - for (final AttemptStreamStats stats : attempt.getStreamStats()) { - if (stats.getStats() == null) { - stats.stats(new AttemptStats()); - } - - final var s = stats.getStats(); - final var runningSync = s.getBytesEmitted() == null; - - // if the sync is not done, this is empty, so we mock it out now. - if (runningSync) { - s.bytesEmitted(RANDOM.nextLong(totalBytes)); - s.recordsEmitted(RANDOM.nextLong(totalRecords)); - - // Set estimate to a random buffer of the estimated to show a 'progress-like' bar. - s.estimatedBytes(s.getBytesEmitted() * RANDOM.nextLong(2, 5)); - s.estimatedRecords(s.getRecordsEmitted() * RANDOM.nextLong(2, 5)); - } else { - // if it's done, set to the correct number. - s.estimatedBytes(s.getBytesEmitted()); - s.estimatedRecords(s.getRecordsEmitted()); - } + final var syncStat = jobPersistence.getSyncStats(attemptId).get(0); + if (a.getTotalStats() == null) { + a.setTotalStats(new AttemptStats()); } + + // total stats + a.getTotalStats() + .estimatedBytes(syncStat.getEstimatedBytes()) + .estimatedRecords(syncStat.getEstimatedRecords()) + .bytesEmitted(syncStat.getBytesEmitted()) + .recordsEmitted(syncStat.getRecordsEmitted()); + +// final var streamStats = attempt.getStreamStats(); +// // if this doesn't exist, mock something. +// +// if (streamStats != null) { +// for (final AttemptStreamStats stats : attempt.getStreamStats()) { +// if (stats.getStats() == null) { +// stats.stats(new AttemptStats()); +// } +// +// final var s = stats.getStats(); +// s.estimatedBytes(s.getBytesEmitted()); +// s.estimatedRecords(s.getRecordsEmitted()); +// } +// } + } } @@ -166,39 +170,42 @@ public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); final JobInfoRead jobinfoRead = jobConverter.getJobInfoRead(job); - // Mock out this data for now. - final var totalRecords = 1000; - final var totalBytes = 100_000; - - for (final AttemptInfoRead attempt : jobinfoRead.getAttempts()) { - final var streamStats = attempt.getAttempt().getStreamStats(); - // if this doesn't exist, mock something. - if (streamStats == null) { - final var stats = List.of(new AttemptStreamStats().streamName("foo stream"), new AttemptStreamStats().streamName("bar stream")); - attempt.getAttempt().streamStats(stats); - } - - for (final AttemptStreamStats stats : attempt.getAttempt().getStreamStats()) { - if (stats.getStats() == null) { - stats.stats(new AttemptStats()); + // jobConverter is pulling from the sync summary, so either we write to the sync summary, or we pull the information directly from + // the table while the job is running. + // if it's not running, we no longer need to do this. + if (job.getStatus() == JobStatus.RUNNING) { + for (final AttemptInfoRead a : jobinfoRead.getAttempts()) { + + final var attemptId = jobPersistence.getAttemptId(job.getId(), a.getAttempt().getId()); + final var syncStatList = jobPersistence.getSyncStats(attemptId); + if (syncStatList.size() == 0) { + // there should only be one returned. + continue; } - final var s = stats.getStats(); - final var runningSync = s.getBytesEmitted() == null; - - // if the sync is not done, this is empty, so we mock it out now. - if (runningSync) { - s.bytesEmitted(RANDOM.nextLong(totalBytes)); - s.recordsEmitted(RANDOM.nextLong(totalRecords)); - - // Set estimate to a random buffer of the estimated to show a 'progress-like' bar. - s.estimatedBytes(s.getBytesEmitted() * RANDOM.nextLong(2, 5)); - s.estimatedRecords(s.getRecordsEmitted() * RANDOM.nextLong(2, 5)); - } else { - // if it's done, set to the correct number. - s.estimatedBytes(s.getBytesEmitted()); - s.estimatedRecords(s.getRecordsEmitted()); + final var syncStat = jobPersistence.getSyncStats(attemptId).get(0); + if (a.getAttempt().getTotalStats() == null) { + a.getAttempt().setTotalStats(new AttemptStats()); } + + // total stats + a.getAttempt().getTotalStats() + .estimatedBytes(syncStat.getEstimatedBytes()) + .estimatedRecords(syncStat.getEstimatedRecords()) + .bytesEmitted(syncStat.getBytesEmitted()) + .recordsEmitted(syncStat.getRecordsEmitted()); + + // stream stats +// for (final AttemptStreamStats stats : a.getAttempt().getStreamStats()) { +// if (stats.getStats() == null) { +// stats.stats(new AttemptStats()); +// } +// +// final var s = stats.getStats(); +// s.estimatedBytes(s.getBytesEmitted()); +// s.estimatedRecords(s.getRecordsEmitted()); +// +// } } } diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index 32af948a25e1..7429edf4d653 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -221,6 +221,7 @@

Table of Contents

Attempt

Connection

@@ -281,6 +282,7 @@

Internal

Jobs

@@ -390,6 +392,58 @@

Workspace

Attempt

+
+
+ Up +
post /v1/attempt/save_stats
+
For worker to set running attempt stats. (saveStats)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SaveStatsRequestBody SaveStatsRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "succeeded" : true
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful Operation + InternalOperationResult +
+
Up @@ -4004,6 +4058,58 @@

200

AttemptNormalizationStatusReadList

+
+
+ Up +
post /v1/attempt/save_stats
+
For worker to set running attempt stats. (saveStats)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SaveStatsRequestBody SaveStatsRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "succeeded" : true
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful Operation + InternalOperationResult +
+
+
+

SaveStatsRequestBody - Up

+
+
+
jobId
Long format: int64
+
attemptNumber
Integer format: int32
+
stats
+
+

SchemaChange - Up

From 109411c648faa665d06756c20537022fbdd84663 Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 17:40:44 -0700 Subject: [PATCH 16/21] support for just displaying totals --- .../components/ProgressBar/ProgressBar.tsx | 80 ++++++++++++------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index b2fcd6db1f8f..e5478aae69b0 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -41,11 +41,11 @@ export const ProgressBar = ({ return null; } - let numeratorRecords = 0; - let denominatorRecords = 0; + let numeratorRecords = -1; + let denominatorRecords = -1; let totalPercentRecords = -1; - let numeratorBytes = 0; - let denominatorBytes = 0; + let numeratorBytes = -1; + let denominatorBytes = -1; // let totalPercentBytes = -1; let elapsedTimeMS = -1; let timeRemainingString = ""; @@ -79,15 +79,31 @@ export const ProgressBar = ({ if (isJobsWithJobs(job)) { if (job.attempts) { latestAttempt = job.attempts[job.attempts?.length - 1]; + let countTotalsFromStreams = true; + if ( + latestAttempt.totalStats?.recordsEmitted && + latestAttempt.totalStats?.estimatedRecords && + latestAttempt.totalStats?.bytesEmitted && + latestAttempt.totalStats?.estimatedBytes + ) { + countTotalsFromStreams = false; + numeratorRecords = latestAttempt.totalStats.recordsEmitted; + denominatorRecords = latestAttempt.totalStats.estimatedRecords; + numeratorBytes = latestAttempt.totalStats.bytesEmitted; + denominatorBytes = latestAttempt.totalStats.estimatedBytes; + } + if (latestAttempt && !latestAttempt.totalStats && latestAttempt.streamStats) { for (const stream of latestAttempt.streamStats) { if (!stream.stats.recordsEmitted) { unEstimatedStreams.push(`${stream.streamName}`); } - numeratorRecords += stream.stats.recordsEmitted ?? 0; - denominatorRecords += stream.stats.estimatedRecords ?? 0; - numeratorBytes += stream.stats.bytesEmitted ?? 0; - denominatorBytes += stream.stats.estimatedBytes ?? 0; + if (countTotalsFromStreams) { + numeratorRecords += stream.stats.recordsEmitted ?? 0; + denominatorRecords += stream.stats.estimatedRecords ?? 0; + numeratorBytes += stream.stats.bytesEmitted ?? 0; + denominatorBytes += stream.stats.estimatedBytes ?? 0; + } } } } @@ -112,10 +128,12 @@ export const ProgressBar = ({ } } + console.log({ unEstimatedStreams, numeratorRecords, denominatorRecords, totalPercentRecords, timeRemainingString }); + return (
{unEstimatedStreams.length === 0 && } - {latestAttempt?.status === Status.RUNNING && latestAttempt.streamStats && ( + {latestAttempt?.status === Status.RUNNING && ( <> {unEstimatedStreams.length === 0 && (
@@ -146,27 +164,29 @@ export const ProgressBar = ({ {formatMessage({ id: "estimate.bytesPerSecond" })}
-
-
-
Stream Stats:
- {latestAttempt.streamStats?.map((stream, idx) => { - const localNumerator = stream.stats.recordsEmitted; - const localDenominator = stream.stats.estimatedRecords; - - return ( -
- {stream.streamName} -{" "} - {localNumerator && localDenominator - ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ - id: "estimate.percentComplete", - })} (${localNumerator} / ${localDenominator} ${formatMessage({ - id: "estimate.recordsSynced", - })})` - : `${localNumerator} ${formatMessage({ id: "estimate.recordsSyncedThusFar" })} (no estimate)`} -
- ); - })} -
+ {latestAttempt.streamStats && ( +
+
+
Stream Stats:
+ {latestAttempt.streamStats?.map((stream, idx) => { + const localNumerator = stream.stats.recordsEmitted; + const localDenominator = stream.stats.estimatedRecords; + + return ( +
+ {stream.streamName} -{" "} + {localNumerator && localDenominator + ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ + id: "estimate.percentComplete", + })} (${localNumerator} / ${localDenominator} ${formatMessage({ + id: "estimate.recordsSynced", + })})` + : `${localNumerator} ${formatMessage({ id: "estimate.recordsSyncedThusFar" })} (no estimate)`} +
+ ); + })} +
+ )} )}
From e19f7270dcaa9a8687a1b0396aa5988eb13ce4cd Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 17:55:47 -0700 Subject: [PATCH 17/21] display progressive enhancment --- .../components/ProgressBar/ProgressBar.tsx | 56 +++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index e5478aae69b0..e82f54ea5752 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -14,6 +14,10 @@ function isJobsWithJobs(job: JobsWithJobs | SynchronousJobRead): job is JobsWith return (job as JobsWithJobs).attempts !== undefined; } +const formatBigNumber = (num: number) => { + return num.toLocaleString(); +}; + const formatBytes = (bytes?: number) => { if (!bytes) { return ; @@ -48,6 +52,7 @@ export const ProgressBar = ({ let denominatorBytes = -1; // let totalPercentBytes = -1; let elapsedTimeMS = -1; + let timeRemaining = -1; let timeRemainingString = ""; const unEstimatedStreams: string[] = []; let latestAttempt: AttemptRead | undefined; @@ -111,14 +116,13 @@ export const ProgressBar = ({ // TODO... maybe } - totalPercentRecords = Math.floor((numeratorRecords * 100) / denominatorRecords); - // totalPercentBytes = Math.floor((numeratorBytes * 100) / denominatorBytes); + totalPercentRecords = denominatorRecords > 0 ? Math.floor((numeratorRecords * 100) / denominatorRecords) : 0; // chose to estimate time remaining based on records rather than bytes if (latestAttempt && latestAttempt.status === Status.RUNNING) { const now = new Date().getTime(); elapsedTimeMS = now - latestAttempt.createdAt * 1000; - const timeRemaining = Math.floor(elapsedTimeMS / totalPercentRecords) * (100 - totalPercentRecords); // in ms + timeRemaining = Math.floor(elapsedTimeMS / totalPercentRecords) * (100 - totalPercentRecords); // in ms const minutesRemaining = Math.ceil(timeRemaining / 1000 / 60); const hoursRemaining = Math.ceil(minutesRemaining / 60); if (minutesRemaining <= 60) { @@ -137,7 +141,7 @@ export const ProgressBar = ({ <> {unEstimatedStreams.length === 0 && (
- {totalPercentRecords}% | {timeRemainingString.length > 0 ? ` ~ ${timeRemainingString}` : ""} + {totalPercentRecords}% {timeRemaining < Infinity && timeRemaining > 0 ? `| ~${timeRemainingString}` : ""}
)} {unEstimatedStreams.length > 0 && ( @@ -145,24 +149,30 @@ export const ProgressBar = ({ {unEstimatedStreams.length} {formatMessage({ id: "estimate.unEstimatedStreams" })}
)} -
- {numeratorRecords} {unEstimatedStreams.length > 0 ? "" : `/ ${denominatorRecords}`}{" "} - {formatMessage({ id: "estimate.recordsSynced" })} @ {Math.round((numeratorRecords / elapsedTimeMS) * 1000)}{" "} - {formatMessage({ id: "estimate.recordsPerSecond" })} -
-
- {formatBytes(numeratorBytes)}{" "} - {unEstimatedStreams.length > 0 ? ( - "" - ) : ( - <> - / - {formatBytes(denominatorBytes)} - - )}{" "} - {formatMessage({ id: "estimate.bytesSynced" })} @ {formatBytes((numeratorBytes * 1000) / elapsedTimeMS)} - {formatMessage({ id: "estimate.bytesPerSecond" })} -
+ {denominatorRecords > 0 && ( + <> +
+ {formatBigNumber(numeratorRecords)}{" "} + {unEstimatedStreams.length > 0 ? "" : `/ ${formatBigNumber(denominatorRecords)}`}{" "} + {formatMessage({ id: "estimate.recordsSynced" })} @{" "} + {Math.round((numeratorRecords / elapsedTimeMS) * 1000)}{" "} + {formatMessage({ id: "estimate.recordsPerSecond" })} +
+
+ {formatBytes(numeratorBytes)}{" "} + {unEstimatedStreams.length > 0 ? ( + "" + ) : ( + <> + / + {formatBytes(denominatorBytes)} + + )}{" "} + {formatMessage({ id: "estimate.bytesSynced" })} @ {formatBytes((numeratorBytes * 1000) / elapsedTimeMS)} + {formatMessage({ id: "estimate.bytesPerSecond" })} +
+ + )} {latestAttempt.streamStats && (
@@ -178,7 +188,7 @@ export const ProgressBar = ({ {localNumerator && localDenominator ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ id: "estimate.percentComplete", - })} (${localNumerator} / ${localDenominator} ${formatMessage({ + })} (${formatBigNumber(localNumerator)} / ${formatBigNumber(localDenominator)} ${formatMessage({ id: "estimate.recordsSynced", })})` : `${localNumerator} ${formatMessage({ id: "estimate.recordsSyncedThusFar" })} (no estimate)`} From 1f492ca946d600542f34efecaa6be600ed78319e Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 1 Nov 2022 18:16:25 -0700 Subject: [PATCH 18/21] Save and return per stream stats. (#18834) * Add the streamStats variable to the api route and implement replication worker sending of stream stats. * Implement migration to add JSONB streamStats column to SyncStats table. * Implement writing of stream stats to db. * Implement this for per-stream. --- airbyte-api/src/main/openapi/config.yaml | 4 ++ .../general/DefaultReplicationWorker.java | 17 +++++- .../internal/AirbyteMessageTracker.java | 14 +++++ .../workers/internal/MessageTracker.java | 4 ++ .../src/main/resources/types/SyncStats.yaml | 4 ++ .../src/main/resources/seed/source_specs.yaml | 56 +++++++++---------- ..._002__AddStreamStatsColumnToSyncStats.java | 34 +++++++++++ .../resources/jobs_database/schema_dump.txt | 17 ++++++ .../job/DefaultJobPersistence.java | 46 +++++++++++---- .../persistence/job/JobPersistence.java | 14 ++++- .../job/DefaultJobPersistenceTest.java | 6 +- .../server/handlers/AttemptHandler.java | 16 +++++- .../server/handlers/JobHistoryHandler.java | 54 +++++++++--------- .../api/generated-api-html/index.html | 1 + 14 files changed, 211 insertions(+), 76 deletions(-) create mode 100644 airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_002__AddStreamStatsColumnToSyncStats.java diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 851d9b992914..67d9f25af6c5 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -4920,6 +4920,10 @@ components: $ref: "#/components/schemas/AttemptNumber" stats: $ref: "#/components/schemas/AttemptStats" + streamStats: + type: array + items: + $ref: "#/components/schemas/AttemptStreamStats" InternalOperationResult: type: object required: diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java index 72fa85754f45..61b457635aa6 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java @@ -15,6 +15,7 @@ import io.airbyte.api.client.invoker.generated.ApiClient; import io.airbyte.api.client.invoker.generated.ApiException; import io.airbyte.api.client.model.generated.AttemptStats; +import io.airbyte.api.client.model.generated.AttemptStreamStats; import io.airbyte.api.client.model.generated.SaveStatsRequestBody; import io.airbyte.commons.io.LineGobbler; import io.airbyte.config.Configs; @@ -467,16 +468,28 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, } private static void saveStats(MessageTracker messageTracker, Long jobId, Integer attemptNumber) { - final AttemptStats attemptStats = new AttemptStats() + final AttemptStats totalStats = new AttemptStats() .bytesEmitted(messageTracker.getTotalBytesEmitted()) .recordsEmitted(messageTracker.getTotalRecordsEmitted()) .estimatedBytes(messageTracker.getTotalBytesEstimated()) .estimatedRecords(messageTracker.getTotalRecordsEstimated()); + // calculate per stream stats + List streamStats = messageTracker.getStreamToEstimatedBytes().keySet().stream().map(stream -> { + final var syncStats = new AttemptStats() + .recordsEmitted(messageTracker.getStreamToEmittedRecords().get(stream)) + .bytesEmitted(messageTracker.getStreamToEmittedBytes().get(stream)) + .estimatedBytes(messageTracker.getStreamToEstimatedBytes().get(stream)) + .estimatedRecords(messageTracker.getStreamToEstimatedRecords().get(stream)); + + return new AttemptStreamStats().streamName(stream).stats(syncStats); + }).collect(Collectors.toList());; + final SaveStatsRequestBody saveStatsRequestBody = new SaveStatsRequestBody() .jobId(jobId) .attemptNumber(attemptNumber) - .stats(attemptStats); + .stats(totalStats) + .streamStats(streamStats); LOGGER.info("saving stats"); try { CLIENT.getAttemptApi().saveStats(saveStatsRequestBody); diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java index 64d2e519109a..6afb755a49b0 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java @@ -385,6 +385,13 @@ public Map getStreamToEmittedRecords() { Map.Entry::getValue)); } + @Override + public Map getStreamToEstimatedRecords() { + return streamToTotalRecordsEstimated.entrySet().stream().collect(Collectors.toMap( + entry -> streamNameToIndex.inverse().get(entry.getKey()), + Map.Entry::getValue)); + } + /** * Swap out stream indices for stream names and return total bytes emitted by stream. */ @@ -395,6 +402,13 @@ public Map getStreamToEmittedBytes() { Map.Entry::getValue)); } + @Override + public Map getStreamToEstimatedBytes() { + return streamToTotalBytesEstimated.entrySet().stream().collect(Collectors.toMap( + entry -> streamNameToIndex.inverse().get(entry.getKey()), + Map.Entry::getValue)); + } + /** * Compute sum of emitted record counts across all streams. */ diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java index 57d5c1320815..a76de0c1a6cf 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java @@ -65,6 +65,8 @@ public interface MessageTracker { */ Map getStreamToEmittedRecords(); + Map getStreamToEstimatedRecords(); + /** * Get the per-stream emitted byte count. This includes messages that were emitted by the source, * but never committed by the destination. @@ -73,6 +75,8 @@ public interface MessageTracker { */ Map getStreamToEmittedBytes(); + Map getStreamToEstimatedBytes(); + /** * Get the overall emitted record count. This includes messages that were emitted by the source, but * never committed by the destination. diff --git a/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml b/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml index 20f4c3b9a6d1..0c60c14f09aa 100644 --- a/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml +++ b/airbyte-config/config-models/src/main/resources/types/SyncStats.yaml @@ -53,3 +53,7 @@ properties: estimatedRecords: type: integer description: The total estimated number of records for the sync + streamStats: + type: array + items: + "$ref": StreamSyncStats.yaml diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 8c82c3ccdcc6..eaea5a925336 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2288,48 +2288,48 @@ supported_destination_sync_modes: [] - dockerImage: "airbyte/source-coinmarketcap:0.1.0" spec: - documentationUrl: https://docs.airbyte.com/integrations/sources/coinmarketcap + documentationUrl: "https://docs.airbyte.com/integrations/sources/coinmarketcap" connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Coinmarketcap Spec - type: object + $schema: "http://json-schema.org/draft-07/schema#" + title: "Coinmarketcap Spec" + type: "object" required: - - api_key - - data_type + - "api_key" + - "data_type" additionalProperties: true properties: api_key: - title: API Key - type: string - description: >- - Your API Key. See here. The token is - case sensitive. + title: "API Key" + type: "string" + description: "Your API Key. See here. The token is case sensitive." airbyte_secret: true data_type: - title: Data type - type: string + title: "Data type" + type: "string" enum: - - latest - - historical - description: >- - /latest: Latest market ticker quotes and averages for cryptocurrencies and exchanges. - /historical: Intervals of historic market data like OHLCV data or data for use in charting libraries. See here. + - "latest" + - "historical" + description: "/latest: Latest market ticker quotes and averages for cryptocurrencies\ + \ and exchanges. /historical: Intervals of historic market data like OHLCV\ + \ data or data for use in charting libraries. See here." symbols: - title: Symbol - type: array - items: { - "type": "string" - } - description: Cryptocurrency symbols. (only used for quotes stream) + title: "Symbol" + type: "array" + items: + type: "string" + description: "Cryptocurrency symbols. (only used for quotes stream)" minItems: 1 examples: - - AVAX - - BTC + - "AVAX" + - "BTC" supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-commercetools:0.1.0" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/commercetools" diff --git a/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_002__AddStreamStatsColumnToSyncStats.java b/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_002__AddStreamStatsColumnToSyncStats.java new file mode 100644 index 000000000000..aa2db496f9a8 --- /dev/null +++ b/airbyte-db/db-lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_40_17_002__AddStreamStatsColumnToSyncStats.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.jobs.migrations; + +import org.flywaydb.core.api.migration.BaseJavaMigration; +import org.flywaydb.core.api.migration.Context; +import org.jooq.DSLContext; +import org.jooq.impl.DSL; +import org.jooq.impl.SQLDataType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// TODO: update migration description in the class name +public class V0_40_17_002__AddStreamStatsColumnToSyncStats extends BaseJavaMigration { + + private static final Logger LOGGER = LoggerFactory.getLogger(V0_40_17_002__AddStreamStatsColumnToSyncStats.class); + + @Override + public void migrate(final Context context) throws Exception { + LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); + + // Warning: please do not use any jOOQ generated code to write a migration. + // As database schema changes, the generated jOOQ code can be deprecated. So + // old migration may not compile if there is any generated code. + final DSLContext ctx = DSL.using(context.getConnection()); + ctx.alterTable("sync_stats") + .add( + DSL.field("stream_stats", SQLDataType.JSONB.nullable(true))) + .execute(); + } + +} diff --git a/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt b/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt index aec8a73238a2..f9b451a6c670 100644 --- a/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt +++ b/airbyte-db/db-lib/src/main/resources/jobs_database/schema_dump.txt @@ -2,6 +2,16 @@ // It is also not used by any piece of code to generate anything. // It doesn't contain the enums created in the database and the default values might also be buggy. +create table "public"."airbyte_configs"( + "id" int8 generated by default as identity not null, + "config_id" varchar(36) not null, + "config_type" varchar(60) not null, + "config_blob" jsonb not null, + "created_at" timestamptz(35) not null default null, + "updated_at" timestamptz(35) not null default null, + constraint "airbyte_configs_pkey" + primary key ("id") +); create table "public"."airbyte_jobs_migrations"( "installed_rank" int4 not null, "version" varchar(50) null, @@ -77,6 +87,7 @@ create table "public"."sync_stats"( "updated_at" timestamptz(35) not null default null, "estimated_records" int8 null, "estimated_bytes" int8 null, + "stream_stats" jsonb null, constraint "sync_stats_pkey" primary key ("id") ); @@ -88,6 +99,12 @@ alter table "public"."sync_stats" add constraint "sync_stats_attempt_id_fkey" foreign key ("attempt_id") references "public"."attempts" ("id"); +create index "airbyte_configs_id_idx" on "public"."airbyte_configs"("config_id" asc); +create unique index "airbyte_configs_pkey" on "public"."airbyte_configs"("id" asc); +create unique index "airbyte_configs_type_id_idx" on "public"."airbyte_configs"( + "config_type" asc, + "config_id" asc +); create unique index "airbyte_jobs_migrations_pk" on "public"."airbyte_jobs_migrations"("installed_rank" asc); create index "airbyte_jobs_migrations_s_idx" on "public"."airbyte_jobs_migrations"("success" asc); create unique index "airbyte_metadata_pkey" on "public"."airbyte_metadata"("key" asc); diff --git a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java index aa26572a4879..8a1b415a8055 100644 --- a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java +++ b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java @@ -32,6 +32,7 @@ import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.JobOutput; import io.airbyte.config.NormalizationSummary; +import io.airbyte.config.StreamSyncStats; import io.airbyte.config.SyncStats; import io.airbyte.db.Database; import io.airbyte.db.ExceptionWrappingDatabase; @@ -404,7 +405,13 @@ public void writeOutput(final long jobId, final int attemptNumber, final JobOutp } @Override - public void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, long estimatedBytes, long recordsEmitted, long bytesEmitted) + public void writeSyncStats(long jobId, + int attemptNumber, + long estimatedRecords, + long estimatedBytes, + long recordsEmitted, + long bytesEmitted, + List streamStats) throws IOException { // Although the attempt table's output has a copy of the sync summary, we do not update it for // running sync stat updates. @@ -423,13 +430,13 @@ public void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, .set(SYNC_STATS.RECORDS_EMITTED, recordsEmitted) .set(SYNC_STATS.ESTIMATED_BYTES, estimatedBytes) .set(SYNC_STATS.ESTIMATED_RECORDS, estimatedRecords) + .set(SYNC_STATS.STREAM_STATS, JSONB.valueOf(Jsons.serialize(streamStats))) .set(SYNC_STATS.UPDATED_AT, now) .where(SYNC_STATS.ATTEMPT_ID.eq(attemptId)) .execute(); return null; } - // does this upsert work? ctx.insertInto(SYNC_STATS) .set(SYNC_STATS.ID, UUID.randomUUID()) .set(SYNC_STATS.UPDATED_AT, now) @@ -439,7 +446,10 @@ public void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, .set(SYNC_STATS.RECORDS_EMITTED, recordsEmitted) .set(SYNC_STATS.ESTIMATED_BYTES, estimatedBytes) .set(SYNC_STATS.ESTIMATED_RECORDS, estimatedRecords) + .set(SYNC_STATS.STREAM_STATS, JSONB.valueOf(Jsons.serialize(streamStats))) .execute(); + + // write per stream stat info return null; }); } @@ -484,16 +494,23 @@ public List getNormalizationSummary(final Long attemptId) } private static RecordMapper getSyncStatsRecordMapper() { - return record -> new SyncStats().withBytesEmitted(record.get(SYNC_STATS.BYTES_EMITTED)).withRecordsEmitted(record.get(SYNC_STATS.RECORDS_EMITTED)) - .withSourceStateMessagesEmitted(record.get(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED)) - .withDestinationStateMessagesEmitted(record.get(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED)) - .withRecordsCommitted(record.get(SYNC_STATS.RECORDS_COMMITTED)) - .withEstimatedBytes(record.get(SYNC_STATS.ESTIMATED_BYTES)) - .withEstimatedRecords(record.get(SYNC_STATS.ESTIMATED_RECORDS)) - .withMeanSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) - .withMaxSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) - .withMeanSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED)) - .withMaxSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED)); + return record -> { + try { + return new SyncStats().withBytesEmitted(record.get(SYNC_STATS.BYTES_EMITTED)).withRecordsEmitted(record.get(SYNC_STATS.RECORDS_EMITTED)) + .withSourceStateMessagesEmitted(record.get(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED)) + .withDestinationStateMessagesEmitted(record.get(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED)) + .withRecordsCommitted(record.get(SYNC_STATS.RECORDS_COMMITTED)) + .withEstimatedBytes(record.get(SYNC_STATS.ESTIMATED_BYTES)) + .withEstimatedRecords(record.get(SYNC_STATS.ESTIMATED_RECORDS)) + .withMeanSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) + .withMaxSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED)) + .withMeanSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED)) + .withMaxSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED)) + .withStreamStats(record.get(SYNC_STATS.STREAM_STATS, String.class) == null ? null : deserializeStreamStats(record)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }; } private static RecordMapper getNormalizationSummaryRecordMapper() { @@ -514,6 +531,11 @@ private static List deserializeFailureReasons(final Record record return List.of(mapper.readValue(String.valueOf(record.get(NORMALIZATION_SUMMARIES.FAILURES)), FailureReason[].class)); } + private static List deserializeStreamStats(final Record record) throws JsonProcessingException { + final ObjectMapper mapper = new ObjectMapper(); + return List.of(mapper.readValue(String.valueOf(record.get(SYNC_STATS.STREAM_STATS)), StreamSyncStats[].class)); + } + @Override public Job getJob(final long jobId) throws IOException { return jobDatabase.query(ctx -> getJob(ctx, jobId)); diff --git a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java index 3a3d67b7ae20..77fd83eec808 100644 --- a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java +++ b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/JobPersistence.java @@ -11,6 +11,7 @@ import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.JobOutput; import io.airbyte.config.NormalizationSummary; +import io.airbyte.config.StreamSyncStats; import io.airbyte.config.SyncStats; import io.airbyte.db.instance.jobs.JobsDatabaseSchema; import io.airbyte.persistence.job.models.AttemptNormalizationStatus; @@ -37,8 +38,9 @@ public interface JobPersistence { List getSyncStats(Long attemptId) throws IOException; /** - * Return the id of the record in the attempt table corresponding to that job and attempt combination. This is useful to index into other attempt-scoped - * metadata. + * Return the id of the record in the attempt table corresponding to that job and attempt + * combination. This is useful to index into other attempt-scoped metadata. + * * @param jobId * @param attemptNumber * @return @@ -146,7 +148,13 @@ public interface JobPersistence { */ void writeOutput(long jobId, int attemptNumber, JobOutput output) throws IOException; - void writeSyncStats(long jobId, int attemptNumber, long estimatedRecords, long estimatedBytes, long recordsEmitted, long bytesEmitted) + void writeSyncStats(long jobId, + int attemptNumber, + long estimatedRecords, + long estimatedBytes, + long recordsEmitted, + long bytesEmitted, + List streamStats) throws IOException; /** diff --git a/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java b/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java index fa221d512e5c..d01de4a579e1 100644 --- a/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java +++ b/airbyte-persistence/job-persistence/src/test/java/io/airbyte/persistence/job/DefaultJobPersistenceTest.java @@ -316,7 +316,7 @@ class SyncStatsTest { void testWriteSyncStatsFirst() throws IOException, SQLException { final long jobId = jobPersistence.enqueueJob(SCOPE, SPEC_JOB_CONFIG).orElseThrow(); final int attemptNumber = jobPersistence.createAttempt(jobId, LOG_PATH); - jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000); + jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000, null); final Optional record = jobDatabase.query(ctx -> ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, @@ -339,7 +339,7 @@ void testWriteSyncStatsRepeated() throws IOException, SQLException { final long jobId = jobPersistence.enqueueJob(SCOPE, SPEC_JOB_CONFIG).orElseThrow(); final int attemptNumber = jobPersistence.createAttempt(jobId, LOG_PATH); - jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000); + jobPersistence.writeSyncStats(jobId, attemptNumber, 1000, 1000, 1000, 1000, null); final Optional record = jobDatabase.query(ctx -> ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId, @@ -352,7 +352,7 @@ void testWriteSyncStatsRepeated() throws IOException, SQLException { assertEquals(1000, stat.getEstimatedBytes()); assertEquals(1000, stat.getEstimatedRecords()); - jobPersistence.writeSyncStats(jobId, attemptNumber, 2000, 2000, 2000, 2000); + jobPersistence.writeSyncStats(jobId, attemptNumber, 2000, 2000, 2000, 2000, null); var stats = jobPersistence.getSyncStats(attemptId); assertEquals(1, stats.size()); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java index 149c0c16b1c8..b4cf998da6da 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java @@ -7,8 +7,11 @@ import io.airbyte.api.model.generated.InternalOperationResult; import io.airbyte.api.model.generated.SaveStatsRequestBody; import io.airbyte.api.model.generated.SetWorkflowInAttemptRequestBody; +import io.airbyte.config.StreamSyncStats; +import io.airbyte.config.SyncStats; import io.airbyte.persistence.job.JobPersistence; import java.io.IOException; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,8 +41,19 @@ public InternalOperationResult saveStats(SaveStatsRequestBody requestBody) { try { // This is for the entire sync for now. final var stats = requestBody.getStats(); + final var streamStats = requestBody.getStreamStats().stream() + .map(s -> new StreamSyncStats() + .withStreamName(s.getStreamName()) + .withStats(new SyncStats() + .withBytesEmitted(s.getStats().getBytesEmitted()) + .withRecordsEmitted(s.getStats().getRecordsEmitted()) + .withEstimatedBytes(s.getStats().getEstimatedBytes()) + .withEstimatedRecords(s.getStats().getEstimatedRecords()))) + .collect(Collectors.toList()); + jobPersistence.writeSyncStats(requestBody.getJobId(), requestBody.getAttemptNumber(), - stats.getEstimatedRecords(), stats.getEstimatedBytes(), stats.getRecordsEmitted(), stats.getBytesEmitted()); + stats.getEstimatedRecords(), stats.getEstimatedBytes(), stats.getRecordsEmitted(), stats.getBytesEmitted(), streamStats); + } catch (IOException ioe) { LOGGER.error("IOException when setting temporal workflow in attempt;", ioe); return new InternalOperationResult().succeeded(false); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index 30890c133f2a..4ad271c6aaa4 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -36,7 +36,6 @@ import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.persistence.job.JobPersistence; -import io.airbyte.persistence.job.models.Attempt; import io.airbyte.persistence.job.models.Job; import io.airbyte.persistence.job.models.JobStatus; import io.airbyte.server.converters.JobConverter; @@ -49,7 +48,9 @@ import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class JobHistoryHandler { private static final Random RANDOM = new Random(); @@ -134,21 +135,18 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept .bytesEmitted(syncStat.getBytesEmitted()) .recordsEmitted(syncStat.getRecordsEmitted()); -// final var streamStats = attempt.getStreamStats(); -// // if this doesn't exist, mock something. -// -// if (streamStats != null) { -// for (final AttemptStreamStats stats : attempt.getStreamStats()) { -// if (stats.getStats() == null) { -// stats.stats(new AttemptStats()); -// } -// -// final var s = stats.getStats(); -// s.estimatedBytes(s.getBytesEmitted()); -// s.estimatedRecords(s.getRecordsEmitted()); -// } -// } - + // stream stats + if (syncStat.getStreamStats() != null) { + final var streamStats = syncStat.getStreamStats().stream().map(s -> new AttemptStreamStats() + .streamName(s.getStreamName()) + .stats(new AttemptStats() + .bytesEmitted(s.getStats().getBytesEmitted()) + .recordsEmitted(s.getStats().getRecordsEmitted()) + .estimatedBytes(s.getStats().getEstimatedBytes()) + .estimatedRecords(s.getStats().getEstimatedRecords()))) + .collect(Collectors.toList()); + a.setStreamStats(streamStats); + } } } @@ -170,7 +168,8 @@ public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); final JobInfoRead jobinfoRead = jobConverter.getJobInfoRead(job); - // jobConverter is pulling from the sync summary, so either we write to the sync summary, or we pull the information directly from + // jobConverter is pulling from the sync summary, so either we write to the sync summary, or we pull + // the information directly from // the table while the job is running. // if it's not running, we no longer need to do this. if (job.getStatus() == JobStatus.RUNNING) { @@ -196,16 +195,17 @@ public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) .recordsEmitted(syncStat.getRecordsEmitted()); // stream stats -// for (final AttemptStreamStats stats : a.getAttempt().getStreamStats()) { -// if (stats.getStats() == null) { -// stats.stats(new AttemptStats()); -// } -// -// final var s = stats.getStats(); -// s.estimatedBytes(s.getBytesEmitted()); -// s.estimatedRecords(s.getRecordsEmitted()); -// -// } + if (syncStat.getStreamStats() != null) { + final var streamStats = syncStat.getStreamStats().stream().map(s -> new AttemptStreamStats() + .streamName(s.getStreamName()) + .stats(new AttemptStats() + .bytesEmitted(s.getStats().getBytesEmitted()) + .recordsEmitted(s.getStats().getRecordsEmitted()) + .estimatedBytes(s.getStats().getEstimatedBytes()) + .estimatedRecords(s.getStats().getEstimatedRecords()))) + .collect(Collectors.toList()); + a.getAttempt().setStreamStats(streamStats); + } } } diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index 7429edf4d653..18ef34854447 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -11459,6 +11459,7 @@

SaveStatsRequestBody - jobId

Long format: int64
attemptNumber
Integer format: int32
stats
+
streamStats (optional)
From 4438dd0078ab989d9e71d96ad28e2ff9530bf945 Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 18:23:12 -0700 Subject: [PATCH 19/21] formatting fixes --- .../src/components/ProgressBar/ProgressBar.tsx | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index e82f54ea5752..e946d6178196 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -14,10 +14,6 @@ function isJobsWithJobs(job: JobsWithJobs | SynchronousJobRead): job is JobsWith return (job as JobsWithJobs).attempts !== undefined; } -const formatBigNumber = (num: number) => { - return num.toLocaleString(); -}; - const formatBytes = (bytes?: number) => { if (!bytes) { return ; @@ -39,7 +35,7 @@ export const ProgressBar = ({ job: JobsWithJobs | SynchronousJobRead; jobConfigType: JobConfigType; }) => { - const { formatMessage } = useIntl(); + const { formatMessage, formatNumber } = useIntl(); if (jobConfigType !== "sync") { return null; @@ -120,8 +116,7 @@ export const ProgressBar = ({ // chose to estimate time remaining based on records rather than bytes if (latestAttempt && latestAttempt.status === Status.RUNNING) { - const now = new Date().getTime(); - elapsedTimeMS = now - latestAttempt.createdAt * 1000; + elapsedTimeMS = new Date().getTime() - latestAttempt.createdAt * 1000; timeRemaining = Math.floor(elapsedTimeMS / totalPercentRecords) * (100 - totalPercentRecords); // in ms const minutesRemaining = Math.ceil(timeRemaining / 1000 / 60); const hoursRemaining = Math.ceil(minutesRemaining / 60); @@ -132,8 +127,6 @@ export const ProgressBar = ({ } } - console.log({ unEstimatedStreams, numeratorRecords, denominatorRecords, totalPercentRecords, timeRemainingString }); - return (
{unEstimatedStreams.length === 0 && } @@ -152,8 +145,8 @@ export const ProgressBar = ({ {denominatorRecords > 0 && ( <>
- {formatBigNumber(numeratorRecords)}{" "} - {unEstimatedStreams.length > 0 ? "" : `/ ${formatBigNumber(denominatorRecords)}`}{" "} + {formatNumber(numeratorRecords)}{" "} + {unEstimatedStreams.length > 0 ? "" : `/ ${formatNumber(denominatorRecords)}`}{" "} {formatMessage({ id: "estimate.recordsSynced" })} @{" "} {Math.round((numeratorRecords / elapsedTimeMS) * 1000)}{" "} {formatMessage({ id: "estimate.recordsPerSecond" })} @@ -188,7 +181,7 @@ export const ProgressBar = ({ {localNumerator && localDenominator ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ id: "estimate.percentComplete", - })} (${formatBigNumber(localNumerator)} / ${formatBigNumber(localDenominator)} ${formatMessage({ + })} (${formatNumber(localNumerator)} / ${formatNumber(localDenominator)} ${formatMessage({ id: "estimate.recordsSynced", })})` : `${localNumerator} ${formatMessage({ id: "estimate.recordsSyncedThusFar" })} (no estimate)`} From b7ed66fb232f6fd8b3ba3631335d47807533e04a Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 18:44:37 -0700 Subject: [PATCH 20/21] better source estimate --- .../connectors/source-faker/source_faker/source.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-faker/source_faker/source.py b/airbyte-integrations/connectors/source-faker/source_faker/source.py index b0925820a337..f4d6f3125c7a 100644 --- a/airbyte-integrations/connectors/source-faker/source_faker/source.py +++ b/airbyte-integrations/connectors/source-faker/source_faker/source.py @@ -140,7 +140,7 @@ def read( users_estimate = count - cursor yield generate_estimate(stream.stream.name, users_estimate, 450) - yield generate_estimate("Purchases", users_estimate * 2, 230) # a fuzzy guess, some users have purchases, some don't + yield generate_estimate("Purchases", users_estimate * 1.5, 230) # a fuzzy guess, some users have purchases, some don't for i in range(cursor, count): user = generate_user(person, dt, i) @@ -208,7 +208,7 @@ def generate_estimate(stream_name: str, total: int, bytes_per_row: int): "trace": { "emitted_at": int(datetime.datetime.now().timestamp() * 1000), "type": "ESTIMATE", - "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": total, "byte_estimate": total * bytes_per_row}, + "estimate": {"type": "STREAM", "name": stream_name, "namespace": "", "row_estimate": round(total), "byte_estimate": round(total * bytes_per_row)}, }, } From 1445e7fd4741e41c09f6a611d8556b163012338d Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 1 Nov 2022 18:44:47 -0700 Subject: [PATCH 21/21] show/hide streams --- .../components/ProgressBar/ProgressBar.tsx | 57 ++++++++++++++++++- airbyte-webapp/src/locales/en.json | 5 +- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx index e946d6178196..78f7f40f39e0 100644 --- a/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx +++ b/airbyte-webapp/src/components/ProgressBar/ProgressBar.tsx @@ -1,5 +1,6 @@ import classNames from "classnames"; import { Line } from "rc-progress"; +import { useState } from "react"; import { useIntl, FormattedMessage } from "react-intl"; import { getJobStatus } from "components/JobItem/JobItem"; @@ -28,6 +29,17 @@ const formatBytes = (bytes?: number) => { return ; }; +const buttonUnStyle = { + background: "none", + color: "inherit", + border: "none", + padding: 0, + font: "inherit", + cursor: "pointer", + outline: "inherit", + textDecoration: "underline", +}; + export const ProgressBar = ({ job, jobConfigType, @@ -36,6 +48,7 @@ export const ProgressBar = ({ jobConfigType: JobConfigType; }) => { const { formatMessage, formatNumber } = useIntl(); + const [showStreams, setShowStreams] = useState(false); if (jobConfigType !== "sync") { return null; @@ -167,16 +180,56 @@ export const ProgressBar = ({ )} - {latestAttempt.streamStats && ( + {latestAttempt.streamStats && !showStreams && (

-
Stream Stats:
+ +
+
+ )} + + {latestAttempt.streamStats && showStreams && ( +
+
+
+ {formatMessage({ + id: "estimate.streamStats", + })}{" "} + ( + + ): +
{latestAttempt.streamStats?.map((stream, idx) => { const localNumerator = stream.stats.recordsEmitted; const localDenominator = stream.stats.estimatedRecords; return (
+ {" - "} {stream.streamName} -{" "} {localNumerator && localDenominator ? `${Math.round((localNumerator * 100) / localDenominator)}${formatMessage({ diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index fef82700e355..3b67de40516d 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -557,8 +557,11 @@ "estimate.bytesSynced": "synced", "estimate.bytesPerSecond": "/sec", "estimate.unEstimatedStreams": "un-estimated streams", - "estimate.recordsSyncedThusFar": "records synced thus far", + "estimate.recordsSyncedThusFar": "records synced", + "estimate.viewStreamStats": "view stream stats", "estimate.percentComplete": "% complete", + "estimate.streamStats": "Stream Stats", + "estimate.hide": "hide", "frequency.manual": "Manual", "frequency.cron": "Cron",