[DO NOT MERGE] Run all PostCommit and PreCommit Tests against Release Branch #33870
8 fail, 21 skipped, 28 pass in 54m 4s
Annotations
Check warning on line 0 in apache_beam.pipeline_test.DoFnTest
github-actions / Test Results
test_element_param (apache_beam.pipeline_test.DoFnTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 18m 25s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.pipeline_test.DoFnTest testMethod=test_element_param>
@pytest.mark.it_validatesrunner
def test_element_param(self):
pipeline = TestPipeline()
input = [1, 2]
pcoll = (
pipeline
| 'Create' >> Create(input)
| 'Ele param' >> Map(lambda element=DoFn.ElementParam: element))
assert_that(pcoll, equal_to(input))
> pipeline.run()
apache_beam/pipeline_test.py:742:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181601034383-5743'
createTime: '2025-02-05T18:16:02.497169Z'
...025-02-05T18:16:02.497169Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7ed93b25ef30>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
Check warning on line 0 in apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest
github-actions / Test Results
test_combine (apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 16m 19s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest testMethod=test_combine>
def test_combine(self):
> run_combine(self.pipeline)
apache_beam/transforms/combinefn_lifecycle_test.py:44:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/transforms/combinefn_lifecycle_pipeline.py:107: in run_combine
with pipeline as p:
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181559924096-1675'
createTime: '2025-02-05T18:16:01.464374Z'
...025-02-05T18:16:01.464374Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x787a93c56d80>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
Check warning on line 0 in apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest
github-actions / Test Results
test_non_liftable_combine (apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 14m 54s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.transforms.combinefn_lifecycle_test.CombineFnLifecycleTest testMethod=test_non_liftable_combine>
def test_non_liftable_combine(self):
> run_combine(self.pipeline, lift_combiners=False)
apache_beam/transforms/combinefn_lifecycle_test.py:47:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/transforms/combinefn_lifecycle_pipeline.py:107: in run_combine
with pipeline as p:
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181600284356-1675'
createTime: '2025-02-05T18:16:01.703017Z'
...025-02-05T18:16:01.703017Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7dc20cbdc920>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
Check warning on line 0 in apache_beam.transforms.dofn_lifecycle_test.DoFnLifecycleTest
github-actions / Test Results
test_dofn_lifecycle (apache_beam.transforms.dofn_lifecycle_test.DoFnLifecycleTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 16m 4s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.transforms.dofn_lifecycle_test.DoFnLifecycleTest testMethod=test_dofn_lifecycle>
def test_dofn_lifecycle(self):
> with TestPipeline() as p:
apache_beam/transforms/dofn_lifecycle_test.py:80:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181600519491-5743'
createTime: '2025-02-05T18:16:01.997262Z'
...025-02-05T18:16:01.997262Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7beb5ec85b50>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
Check warning on line 0 in apache_beam.metrics.metric_test.MetricsTest
github-actions / Test Results
test_user_counter_using_pardo (apache_beam.metrics.metric_test.MetricsTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 18m 40s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.metrics.metric_test.MetricsTest testMethod=test_user_counter_using_pardo>
@pytest.mark.it_validatesrunner
def test_user_counter_using_pardo(self):
class SomeDoFn(beam.DoFn):
"""A custom dummy DoFn using yield."""
static_counter_elements = metrics.Metrics.counter(
"SomeDoFn", 'metrics_static_counter_element')
def __init__(self):
self.user_counter_elements = metrics.Metrics.counter(
self.__class__, 'metrics_user_counter_element')
def process(self, element):
self.static_counter_elements.inc(2)
self.user_counter_elements.inc()
distro = Metrics.distribution(self.__class__, 'element_dist')
distro.update(element)
yield element
pipeline = TestPipeline()
nums = pipeline | 'Input' >> beam.Create([1, 2, 3, 4])
results = nums | 'ApplyPardo' >> beam.ParDo(SomeDoFn())
assert_that(results, equal_to([1, 2, 3, 4]))
> res = pipeline.run()
apache_beam/metrics/metric_test.py:177:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181600857749-5743'
createTime: '2025-02-05T18:16:02.760410Z'
...025-02-05T18:16:02.760410Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7c6b4124a030>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
github-actions / Test Results
test_run_packable_combine_limit (apache_beam.runners.portability.fn_api_runner.translations_test.TranslationsTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 16m 0s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.runners.portability.fn_api_runner.translations_test.TranslationsTest testMethod=test_run_packable_combine_limit>
@pytest.mark.it_validatesrunner
def test_run_packable_combine_limit(self):
class MultipleLargeCombines(beam.PTransform):
def annotations(self):
# Limit to at most 2 combiners per packed combiner.
return {python_urns.APPLY_COMBINER_PACKING: b'2'}
def expand(self, pcoll):
assert_that(
pcoll | 'min-1-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-1-globally')
assert_that(
pcoll | 'min-2-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-2-globally')
assert_that(
pcoll | 'min-3-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-3-globally')
class MultipleSmallCombines(beam.PTransform):
def annotations(self):
# Limit to at most 4 combiners per packed combiner.
return {python_urns.APPLY_COMBINER_PACKING: b'4'}
def expand(self, pcoll):
assert_that(
pcoll | 'min-4-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-4-globally')
assert_that(
pcoll | 'min-5-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-5-globally')
> with TestPipeline() as pipeline:
apache_beam/runners/portability/fn_api_runner/translations_test.py:346:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181605262494-5743'
createTime: '2025-02-05T18:16:07.644561Z'
...025-02-05T18:16:07.644561Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7a136b1e7500>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
github-actions / Test Results
test_run_packable_combine_globally (apache_beam.runners.portability.fn_api_runner.translations_test.TranslationsTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 17m 2s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.runners.portability.fn_api_runner.translations_test.TranslationsTest testMethod=test_run_packable_combine_globally>
@pytest.mark.it_validatesrunner
def test_run_packable_combine_globally(self):
class MultipleCombines(beam.PTransform):
def annotations(self):
return {python_urns.APPLY_COMBINER_PACKING: b''}
def expand(self, pcoll):
# These CombineGlobally stages will be packed if and only if
# translations.eliminate_common_key_with_void and
# translations.pack_combiners are enabled in the TestPipeline runner.
assert_that(
pcoll | 'min-globally' >> core.CombineGlobally(min),
equal_to([-1]),
label='assert-min-globally')
assert_that(
pcoll | 'count-globally' >> combiners.Count.Globally(),
equal_to([10]),
label='assert-count-globally')
assert_that(
pcoll
| 'largest-globally' >> combiners.Top.Largest(2),
equal_to([[9, 6]]),
label='assert-largest-globally')
> with TestPipeline() as pipeline:
apache_beam/runners/portability/fn_api_runner/translations_test.py:306:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181603521748-7650'
createTime: '2025-02-05T18:16:04.908160Z'
...025-02-05T18:16:04.908160Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x79b68c70de20>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException
Check warning on line 0 in apache_beam.transforms.ptransform_test.PTransformTest
github-actions / Test Results
test_flatten_one_single_pcollection (apache_beam.transforms.ptransform_test.PTransformTest) failed
sdks/python/pytest_validatesRunnerBatchTests-df-py312.xml [took 18m 34s]
Raw output
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Workflow failed.
self = <apache_beam.transforms.ptransform_test.PTransformTest testMethod=test_flatten_one_single_pcollection>
@pytest.mark.it_validatesrunner
def test_flatten_one_single_pcollection(self):
> with TestPipeline() as pipeline:
apache_beam/transforms/ptransform_test.py:729:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/pipeline.py:644: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:115: in run
result = super().run(
apache_beam/pipeline.py:594: in run
self._options).run(False)
apache_beam/pipeline.py:618: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:66: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20250205181603222263-5743'
createTime: '2025-02-05T18:16:04.615434Z'
...025-02-05T18:16:04.615434Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7891a75afc50>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
consoleUrl = (
"Console URL: https://console.cloud.google.com/"
f"dataflow/jobs/<RegionId>/{self.job_id()}"
"?project=<ProjectId>")
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely. '
'{}'.format(consoleUrl))
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
_LOGGER.error(consoleUrl)
> raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
E Workflow failed.
apache_beam/runners/dataflow/dataflow_runner.py:807: DataflowRuntimeException