diff --git a/disdrodb/L0/L0B_processing.py b/disdrodb/L0/L0B_processing.py index 8e69e14a..bb66489f 100644 --- a/disdrodb/L0/L0B_processing.py +++ b/disdrodb/L0/L0B_processing.py @@ -46,6 +46,7 @@ get_velocity_bin_width, get_raw_field_nbins, get_L0B_encodings_dict, + get_time_encoding, ) logger = logging.getLogger(__name__) @@ -137,9 +138,9 @@ def reshape_raw_spectrum_to_2D(arr, n_bins_dict, n_timesteps): return arr -def retrieve_L1_raw_arrays(df, sensor_name, lazy=True, verbose=False): +def retrieve_L0B_arrays(df, sensor_name, lazy=True, verbose=False): # Log - msg = " - Retrieval of L1 data matrix started." + msg = " - Retrieval of L0B data matrix started." if verbose: print(msg) logger.info(msg) @@ -246,6 +247,13 @@ def get_coords(sensor_name): return coords +def convert_object_variables_to_string(ds): + for var in ds.data_vars: + if pd.api.types.is_object_dtype(ds[var]): + ds[var] = ds[var].astype(str) + return ds + + def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False): # Retrieve sensor name sensor_name = attrs["sensor_name"] @@ -262,7 +270,7 @@ def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False): df, sensor_name=sensor_name, lazy=lazy, verbose=verbose ) # Retrieve raw data matrices - dict_data = retrieve_L1_raw_arrays(df, sensor_name, lazy=lazy, verbose=verbose) + dict_data = retrieve_L0B_arrays(df, sensor_name, lazy=lazy, verbose=verbose) # Define raw data matrix variables for xarray Dataset data_vars = { "raw_drop_concentration": ( @@ -347,7 +355,10 @@ def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False): msg = f"Error in the creation of L1 xarray Dataset. The error is: \n {e}" logger.error(msg) raise ValueError(msg) - + + # Ensure variables with dtype object are converted to string + ds = convert_object_variables_to_string(ds) + # ----------------------------------------------------------- # Check L0B standards check_L0B_standards(ds) @@ -376,28 +387,43 @@ def sanitize_encodings_dict(encoding_dict, ds): def rechunk_dataset(ds, encoding_dict): for var in ds.data_vars: - chunks = encoding_dict[var]["chunksizes"] + chunks = encoding_dict[var].pop("chunksizes") if chunks is not None: ds[var] = ds[var].chunk(chunks) return ds -def write_L0B(ds, fpath, sensor_name): - # Ensure directory exist - os.makedirs(os.path.dirname(fpath), exist_ok=True) - +def set_encodings(ds, sensor_name): # Get encoding dictionary encoding_dict = get_L0B_encodings_dict(sensor_name) encoding_dict = {k: encoding_dict[k] for k in ds.data_vars} - - # Ensure chunksize smaller than the array shape) + + # Ensure chunksize smaller than the array shape encoding_dict = sanitize_encodings_dict(encoding_dict, ds) - + # Rechunk variables for fast writing ! + # - This pop the chunksize argument from the encoding dict ! ds = rechunk_dataset(ds, encoding_dict) + + # Set time encoding + ds['time'].encoding.update(get_time_encoding()) + + # Set the variable encodings + for var in ds.data_vars: + ds[var].encoding.update(encoding_dict[var]) + + return ds + + +def write_L0B(ds, fpath, sensor_name): + # Ensure directory exist + os.makedirs(os.path.dirname(fpath), exist_ok=True) + # Set encodings + ds = set_encodings(ds, sensor_name) + # Write netcdf - ds.to_netcdf(fpath, engine="netcdf4", encoding=encoding_dict) + ds.to_netcdf(fpath, engine="netcdf4") # , encoding=encoding_dict) ####--------------------------------------------------------------------------. diff --git a/disdrodb/L0/configs/OTT_Parsivel/L0A_encodings.yml b/disdrodb/L0/configs/OTT_Parsivel/L0A_encodings.yml index 06813fcf..46d5e636 100644 --- a/disdrodb/L0/configs/OTT_Parsivel/L0A_encodings.yml +++ b/disdrodb/L0/configs/OTT_Parsivel/L0A_encodings.yml @@ -2,31 +2,31 @@ rainfall_rate_32bit: 'float32' rainfall_accumulated_32bit: 'float32' weather_code_synop_4680: 'uint32' weather_code_synop_4677: 'uint32' -weather_code_metar_4678: 'object' -weather_code_nws: 'object' +weather_code_metar_4678: 'str' +weather_code_nws: 'str' reflectivity_32bit: 'float32' mor_visibility: 'uint16' sample_interval: 'uint16' laser_amplitude: 'uint32' number_particles: 'uint32' sensor_temperature: 'int8' -sensor_serial_number: 'object' -firmware_iop: 'object' -firmware_dsp: 'object' +sensor_serial_number: 'str' +firmware_iop: 'str' +firmware_dsp: 'str' sensor_heating_current: 'float32' sensor_battery_voltage: 'float32' sensor_status: 'uint8' -start_time: 'object' -sensor_time: 'object' -sensor_date: 'object' -station_name: 'object' -station_number: 'object' +start_time: 'str' +sensor_time: 'str' +sensor_date: 'str' +station_name: 'str' +station_number: 'str' rainfall_amount_absolute_32bit: 'float32' error_code: 'uint8' rainfall_rate_16bit: 'float32' rainfall_rate_12bit: 'float32' rainfall_accumulated_16bit: 'float32' reflectivity_16bit: 'float32' -raw_drop_concentration: 'object' -raw_drop_average_velocity: 'object' -raw_drop_number: 'object' +raw_drop_concentration: 'str' +raw_drop_average_velocity: 'str' +raw_drop_number: 'str' diff --git a/disdrodb/L0/configs/OTT_Parsivel2/L0A_encodings.yml b/disdrodb/L0/configs/OTT_Parsivel2/L0A_encodings.yml index d5694586..548d87fb 100644 --- a/disdrodb/L0/configs/OTT_Parsivel2/L0A_encodings.yml +++ b/disdrodb/L0/configs/OTT_Parsivel2/L0A_encodings.yml @@ -2,25 +2,25 @@ rainfall_rate_32bit: 'float32' rainfall_accumulated_32bit: 'float32' weather_code_synop_4680: 'uint32' weather_code_synop_4677: 'uint32' -weather_code_metar_4678: 'object' -weather_code_nws: 'object' +weather_code_metar_4678: 'str' +weather_code_nws: 'str' reflectivity_32bit: 'float32' mor_visibility: 'uint16' sample_interval: 'uint16' laser_amplitude: 'uint32' number_particles: 'uint32' sensor_temperature: 'int8' -sensor_serial_number: 'object' -firmware_iop: 'object' -firmware_dsp: 'object' +sensor_serial_number: 'str' +firmware_iop: 'str' +firmware_dsp: 'str' sensor_heating_current: 'float32' sensor_battery_voltage: 'float32' sensor_status: 'uint8' -start_time: 'object' -sensor_time: 'object' -sensor_date: 'object' -station_name: 'object' -station_number: 'object' +start_time: 'str' +sensor_time: 'str' +sensor_date: 'str' +station_name: 'str' +station_number: 'str' rainfall_amount_absolute_32bit: 'float32' error_code: 'uint8' sensor_temperature_pcb: 'int8' @@ -33,7 +33,7 @@ reflectivity_16bit: 'float32' rain_kinetic_energy: 'float32' snowfall_rate: 'float32' number_particles_all: 'uint32' -list_particles: 'object' -raw_drop_concentration: 'object' -raw_drop_average_velocity: 'object' -raw_drop_number: 'object' \ No newline at end of file +list_particles: 'str' +raw_drop_concentration: 'str' +raw_drop_average_velocity: 'str' +raw_drop_number: 'str' \ No newline at end of file diff --git a/disdrodb/L0/configs/Thies_LPM/L0A_encodings.yml b/disdrodb/L0/configs/Thies_LPM/L0A_encodings.yml index d60d11fe..0c1a27e8 100644 --- a/disdrodb/L0/configs/Thies_LPM/L0A_encodings.yml +++ b/disdrodb/L0/configs/Thies_LPM/L0A_encodings.yml @@ -2,15 +2,15 @@ start_identifier: 'uint8' device_address: 'uint8' sensor_serial_number: 'uint16' software_version: 'float32' -date_sensor: 'object' -time_sensor: 'object' +date_sensor: 'str' +time_sensor: 'str' weather_code_synop_4677_5min: 'uint8' weather_code_synop_4680_5min : 'uint8' -weather_code_metar_4678_5min : 'object' +weather_code_metar_4678_5min : 'str' precipitation_rate_5min : 'float32' weather_code_synop_4677: 'uint8' weather_code_synop_4680: 'uint8' -weather_code_metar_4678: 'object' +weather_code_metar_4678: 'str' precipitation_rate: 'float32' rainfall_rate: 'float32' snowfall_rate: 'float32' @@ -40,14 +40,14 @@ laser_temperature: 'uint16' laser_current_average: 'uint16' control_voltage: 'float32' optical_control_voltage_output: 'float32' -sensor_voltage_supply : 'object' -current_heating_pane_transmitter_head : 'object' -current_heating_pane_receiver_head: 'object' +sensor_voltage_supply : 'str' +current_heating_pane_transmitter_head : 'str' +current_heating_pane_receiver_head: 'str' temperature_ambient: 'float32' -current_heating_voltage_supply: 'object' -current_heating_house: 'object' -current_heating_heads: 'object' -current_heating_carriers: 'object' +current_heating_voltage_supply: 'str' +current_heating_house: 'str' +current_heating_heads: 'str' +current_heating_carriers: 'str' number_particles: 'uint16' number_particles_internal_data: 'float32' number_particles_min_speed: 'uint16' @@ -78,7 +78,7 @@ number_particles_class_8: 'uint16' number_particles_class_8_internal_data: 'float32' number_particles_class_9: 'uint16' number_particles_class_9_internal_data: 'float32' -raw_drop_number: 'object' +raw_drop_number: 'str' # '521': air_temperature # '522': relative_humidity # '523': wind_speed diff --git a/disdrodb/L0/readers/NCAR/reader_VORTEX2_2010.py b/disdrodb/L0/readers/NCAR/reader_VORTEX2_2010.py index e35b299a..488d886d 100644 --- a/disdrodb/L0/readers/NCAR/reader_VORTEX2_2010.py +++ b/disdrodb/L0/readers/NCAR/reader_VORTEX2_2010.py @@ -296,7 +296,7 @@ def df_sanitizer_fun(df, lazy=False): df["time"] = dd.to_datetime( df["sensor_date"] + "-" + df["sensor_time"], format="%d.%m.%Y-%H:%M:%S" ) - df = df.drop(columns=["sensor_date", "sensor_time"]) + df = df.drop(columns=["sensor_date", "sensor_time", "start_time"]) # Reformat weather codes if "weather_code_metar_4678" in df.columns: diff --git a/disdrodb/L0/standards.py b/disdrodb/L0/standards.py index 24e8fe99..c1f29d3b 100644 --- a/disdrodb/L0/standards.py +++ b/disdrodb/L0/standards.py @@ -30,14 +30,13 @@ PRODUCT_VERSION = "V0" SOFTWARE_VERSION = "V0" +EPOCH = u"seconds since 1970-01-01 00:00:00" -# TODO: -# - get_L0_dtype, -# - get_L1_netcdfencoding_dict --> get_encodings(sensor_name) -# - _get_encodings_key -# - get_encodings_dtype -# - get_encodings_chunk -# - get_encodings_ .... + +# Notes: +# - L0A_encodings currently specify only the dtype. This could be expanded in the future. +# - disdrodb.configs ... the netcdf chunk size could be an option to be specified + def read_config_yml(sensor_name, filename): """Read a config yaml file and return the dictionary.""" @@ -94,7 +93,8 @@ def get_data_format_dict(sensor_name): def get_long_name_dict(sensor_name): """Get a dictionary containing the long name of each sensor variable.""" return read_config_yml(sensor_name=sensor_name, filename="variable_longname.yml") - + + def get_units_dict(sensor_name): """Get a dictionary containing the unit of each sensor variable.""" return read_config_yml(sensor_name=sensor_name, filename="variable_units.yml") @@ -120,17 +120,19 @@ def get_velocity_bins_dict(sensor_name): def get_L0A_dtype(sensor_name): - """Get a dictionary containing the L0 dtype.""" + """Get a dictionary containing the L0A dtype.""" # Note: This function could extract the info from get_L0A_encodings_dict in future. d = read_config_yml(sensor_name=sensor_name, filename="L0A_encodings.yml") return d + def get_L0A_encodings_dict(sensor_name): - """Get a dictionary containing the L0 dtype.""" - # L0A_encodings currently specify only the dtype ... could be expanded in future. + """Get a dictionary containing the L0A encodings.""" + # - L0A_encodings currently specify only the dtype. This could be expanded in the future. d = read_config_yml(sensor_name=sensor_name, filename="L0A_encodings.yml") return d + def get_L0B_encodings_dict(sensor_name): """Get a dictionary containing the encoding to write L0B netCDFs.""" d = read_config_yml(sensor_name=sensor_name, filename="L0B_encodings.yml") @@ -161,6 +163,13 @@ def get_L0B_encodings_dict(sensor_name): return d +def get_time_encoding(): + encoding = {} + encoding['units'] = EPOCH + encoding['calendar'] = 'proleptic_gregorian' + return encoding + + def set_DISDRODB_L0_attrs(ds, attrs): sensor_name = attrs['sensor_name'] #---------------------------------- @@ -172,7 +181,7 @@ def set_DISDRODB_L0_attrs(ds, attrs): description_dict = get_description_dict(sensor_name) units_dict = get_units_dict(sensor_name) long_name_dict = get_long_name_dict(sensor_name) - data_format_dict = get_data_format_dict(sensor_name) + # data_format_dict = get_data_format_dict(sensor_name) for var in list(ds.data_vars): attrs_var = {} attrs_var['long_name'] = long_name_dict[var] @@ -201,6 +210,7 @@ def set_DISDRODB_L0_attrs(ds, attrs): return ds + ####-------------------------------------------------------------------------. ############################################# #### Get diameter and velocity bins info #### diff --git a/disdrodb/tests/test_reader_template.py b/disdrodb/tests/test_reader_template.py index 90dad9be..5dce9557 100644 --- a/disdrodb/tests/test_reader_template.py +++ b/disdrodb/tests/test_reader_template.py @@ -6,7 +6,7 @@ @author: ghiggi """ from disdrodb.L0.L0A_processing import read_raw_data -from disdrodb.L0.L0B_processing import retrieve_L1_raw_arrays, create_L0B_from_L0A +from disdrodb.L0.L0B_processing import retrieve_L0B_arrays, create_L0B_from_L0A lazy = False # should we test also True ! @@ -38,7 +38,7 @@ df = df_sanitizer_fun(df, lazy=lazy) print(df) -dict_data = retrieve_L1_raw_arrays(df, sensor_name, lazy=lazy, verbose=False) +dict_data = retrieve_L0B_arrays(df, sensor_name, lazy=lazy, verbose=False) # Note: here the dtype of the 1D variable is object. Expected. -ds = create_L0B_from_L0A(df, attrs, lazy=True, verbose=False) +ds = create_L0B_from_L0A(df, attrs, lazy=lazy, verbose=False)