From dffc55d1b8b7e4935e77a1db6ac7a1dd74198a6c Mon Sep 17 00:00:00 2001 From: Martin Collignon <2604526+martincollignon@users.noreply.github.com> Date: Sun, 1 Dec 2024 14:47:58 +0100 Subject: [PATCH] fix: correct Dataflow script issues - Add proper function definitions - Fix value provider access with .get() - Ensure correct scoping of functions --- backend/dataflow/validate_geometries.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/dataflow/validate_geometries.py b/backend/dataflow/validate_geometries.py index d8d227ca..d872a622 100644 --- a/backend/dataflow/validate_geometries.py +++ b/backend/dataflow/validate_geometries.py @@ -94,16 +94,16 @@ def process(self, element): yield {'dataset': dataset, 'data': gdf, 'stats': stats} -def run(argv=None): - pipeline_options = ValidateGeometriesOptions(argv) +def run(): + pipeline_options = PipelineOptions() options = pipeline_options.view_as(ValidateGeometriesOptions) - + with beam.Pipeline(options=pipeline_options) as p: (p - | 'Create Dataset' >> beam.Create([options.dataset]) - | 'Read Data' >> beam.Map(lambda dataset: read_dataset(dataset, options.input_bucket)) + | 'Create Dataset' >> beam.Create([options.dataset.get()]) + | 'Read Data' >> beam.Map(lambda dataset: read_dataset(dataset, options.input_bucket.get())) | 'Validate and Optimize' >> beam.ParDo(ValidateAndOptimize()) - | 'Write Results' >> beam.Map(lambda element: write_outputs(element, options.output_bucket)) + | 'Write Results' >> beam.Map(lambda element: write_outputs(element, options.output_bucket.get())) ) if __name__ == '__main__':