diff --git a/workflow/process_dicom-seg_generated.py b/workflow/process_dicom-seg_generated.py index e468792..a3901bd 100644 --- a/workflow/process_dicom-seg_generated.py +++ b/workflow/process_dicom-seg_generated.py @@ -9,6 +9,7 @@ from dbx.pixels import Catalog from dbx.pixels.dicom import DicomMetaExtractor +from pyspark.sql.functions import expr catalog = Catalog(spark, table=table, volume=volume) catalog_df = catalog.catalog(path=path, streaming=True, streamCheckpointBasePath=f"{catalog._volume_path}/checkpoints/monai_label_segm/") @@ -16,6 +17,10 @@ catalog_df = spark.readStream.table(table+"_autoseg_result").selectExpr("concat('dbfs:', nullif(result, '')) as path").where('path is not null') catalog_df = Catalog._with_path_meta(catalog_df) -meta_df = DicomMetaExtractor(catalog).transform(catalog_df) +meta_df = DicomMetaExtractor(catalog, deep=False).transform(catalog_df) +meta_df = meta_df\ + .withColumn("modificationTime", expr("to_timestamp(unix_timestamp(concat(meta:['00080023'].Value[0], meta:['00080033'].Value[0]), 'yyyyMMddHHmmss'))"))\ + .withColumn("length", expr("meta:['file_size']").cast("bigint")) + catalog.save(meta_df, mode="append")