Skip to content

Commit

Permalink
Merge pull request #68 from ghiggi/bugfix-L0B-encodings
Browse files Browse the repository at this point in the history
Fix time and object netCDF encoding problems  #10, #45, #48
  • Loading branch information
regislon authored Sep 16, 2022
2 parents ed9a8a1 + 4991fdc commit 985f082
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 68 deletions.
52 changes: 39 additions & 13 deletions disdrodb/L0/L0B_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
get_velocity_bin_width,
get_raw_field_nbins,
get_L0B_encodings_dict,
get_time_encoding,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -137,9 +138,9 @@ def reshape_raw_spectrum_to_2D(arr, n_bins_dict, n_timesteps):
return arr


def retrieve_L1_raw_arrays(df, sensor_name, lazy=True, verbose=False):
def retrieve_L0B_arrays(df, sensor_name, lazy=True, verbose=False):
# Log
msg = " - Retrieval of L1 data matrix started."
msg = " - Retrieval of L0B data matrix started."
if verbose:
print(msg)
logger.info(msg)
Expand Down Expand Up @@ -246,6 +247,13 @@ def get_coords(sensor_name):
return coords


def convert_object_variables_to_string(ds):
for var in ds.data_vars:
if pd.api.types.is_object_dtype(ds[var]):
ds[var] = ds[var].astype(str)
return ds


def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False):
# Retrieve sensor name
sensor_name = attrs["sensor_name"]
Expand All @@ -262,7 +270,7 @@ def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False):
df, sensor_name=sensor_name, lazy=lazy, verbose=verbose
)
# Retrieve raw data matrices
dict_data = retrieve_L1_raw_arrays(df, sensor_name, lazy=lazy, verbose=verbose)
dict_data = retrieve_L0B_arrays(df, sensor_name, lazy=lazy, verbose=verbose)
# Define raw data matrix variables for xarray Dataset
data_vars = {
"raw_drop_concentration": (
Expand Down Expand Up @@ -347,7 +355,10 @@ def create_L0B_from_L0A(df, attrs, lazy=True, verbose=False):
msg = f"Error in the creation of L1 xarray Dataset. The error is: \n {e}"
logger.error(msg)
raise ValueError(msg)


# Ensure variables with dtype object are converted to string
ds = convert_object_variables_to_string(ds)

# -----------------------------------------------------------
# Check L0B standards
check_L0B_standards(ds)
Expand Down Expand Up @@ -376,28 +387,43 @@ def sanitize_encodings_dict(encoding_dict, ds):

def rechunk_dataset(ds, encoding_dict):
for var in ds.data_vars:
chunks = encoding_dict[var]["chunksizes"]
chunks = encoding_dict[var].pop("chunksizes")
if chunks is not None:
ds[var] = ds[var].chunk(chunks)
return ds


def write_L0B(ds, fpath, sensor_name):
# Ensure directory exist
os.makedirs(os.path.dirname(fpath), exist_ok=True)

def set_encodings(ds, sensor_name):
# Get encoding dictionary
encoding_dict = get_L0B_encodings_dict(sensor_name)
encoding_dict = {k: encoding_dict[k] for k in ds.data_vars}

# Ensure chunksize smaller than the array shape)
# Ensure chunksize smaller than the array shape
encoding_dict = sanitize_encodings_dict(encoding_dict, ds)

# Rechunk variables for fast writing !
# - This pop the chunksize argument from the encoding dict !
ds = rechunk_dataset(ds, encoding_dict)

# Set time encoding
ds['time'].encoding.update(get_time_encoding())

# Set the variable encodings
for var in ds.data_vars:
ds[var].encoding.update(encoding_dict[var])

return ds


def write_L0B(ds, fpath, sensor_name):
# Ensure directory exist
os.makedirs(os.path.dirname(fpath), exist_ok=True)

# Set encodings
ds = set_encodings(ds, sensor_name)

# Write netcdf
ds.to_netcdf(fpath, engine="netcdf4", encoding=encoding_dict)
ds.to_netcdf(fpath, engine="netcdf4") # , encoding=encoding_dict)


####--------------------------------------------------------------------------.
Expand Down
26 changes: 13 additions & 13 deletions disdrodb/L0/configs/OTT_Parsivel/L0A_encodings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@ rainfall_rate_32bit: 'float32'
rainfall_accumulated_32bit: 'float32'
weather_code_synop_4680: 'uint32'
weather_code_synop_4677: 'uint32'
weather_code_metar_4678: 'object'
weather_code_nws: 'object'
weather_code_metar_4678: 'str'
weather_code_nws: 'str'
reflectivity_32bit: 'float32'
mor_visibility: 'uint16'
sample_interval: 'uint16'
laser_amplitude: 'uint32'
number_particles: 'uint32'
sensor_temperature: 'int8'
sensor_serial_number: 'object'
firmware_iop: 'object'
firmware_dsp: 'object'
sensor_serial_number: 'str'
firmware_iop: 'str'
firmware_dsp: 'str'
sensor_heating_current: 'float32'
sensor_battery_voltage: 'float32'
sensor_status: 'uint8'
start_time: 'object'
sensor_time: 'object'
sensor_date: 'object'
station_name: 'object'
station_number: 'object'
start_time: 'str'
sensor_time: 'str'
sensor_date: 'str'
station_name: 'str'
station_number: 'str'
rainfall_amount_absolute_32bit: 'float32'
error_code: 'uint8'
rainfall_rate_16bit: 'float32'
rainfall_rate_12bit: 'float32'
rainfall_accumulated_16bit: 'float32'
reflectivity_16bit: 'float32'
raw_drop_concentration: 'object'
raw_drop_average_velocity: 'object'
raw_drop_number: 'object'
raw_drop_concentration: 'str'
raw_drop_average_velocity: 'str'
raw_drop_number: 'str'
28 changes: 14 additions & 14 deletions disdrodb/L0/configs/OTT_Parsivel2/L0A_encodings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,25 @@ rainfall_rate_32bit: 'float32'
rainfall_accumulated_32bit: 'float32'
weather_code_synop_4680: 'uint32'
weather_code_synop_4677: 'uint32'
weather_code_metar_4678: 'object'
weather_code_nws: 'object'
weather_code_metar_4678: 'str'
weather_code_nws: 'str'
reflectivity_32bit: 'float32'
mor_visibility: 'uint16'
sample_interval: 'uint16'
laser_amplitude: 'uint32'
number_particles: 'uint32'
sensor_temperature: 'int8'
sensor_serial_number: 'object'
firmware_iop: 'object'
firmware_dsp: 'object'
sensor_serial_number: 'str'
firmware_iop: 'str'
firmware_dsp: 'str'
sensor_heating_current: 'float32'
sensor_battery_voltage: 'float32'
sensor_status: 'uint8'
start_time: 'object'
sensor_time: 'object'
sensor_date: 'object'
station_name: 'object'
station_number: 'object'
start_time: 'str'
sensor_time: 'str'
sensor_date: 'str'
station_name: 'str'
station_number: 'str'
rainfall_amount_absolute_32bit: 'float32'
error_code: 'uint8'
sensor_temperature_pcb: 'int8'
Expand All @@ -33,7 +33,7 @@ reflectivity_16bit: 'float32'
rain_kinetic_energy: 'float32'
snowfall_rate: 'float32'
number_particles_all: 'uint32'
list_particles: 'object'
raw_drop_concentration: 'object'
raw_drop_average_velocity: 'object'
raw_drop_number: 'object'
list_particles: 'str'
raw_drop_concentration: 'str'
raw_drop_average_velocity: 'str'
raw_drop_number: 'str'
24 changes: 12 additions & 12 deletions disdrodb/L0/configs/Thies_LPM/L0A_encodings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ start_identifier: 'uint8'
device_address: 'uint8'
sensor_serial_number: 'uint16'
software_version: 'float32'
date_sensor: 'object'
time_sensor: 'object'
date_sensor: 'str'
time_sensor: 'str'
weather_code_synop_4677_5min: 'uint8'
weather_code_synop_4680_5min : 'uint8'
weather_code_metar_4678_5min : 'object'
weather_code_metar_4678_5min : 'str'
precipitation_rate_5min : 'float32'
weather_code_synop_4677: 'uint8'
weather_code_synop_4680: 'uint8'
weather_code_metar_4678: 'object'
weather_code_metar_4678: 'str'
precipitation_rate: 'float32'
rainfall_rate: 'float32'
snowfall_rate: 'float32'
Expand Down Expand Up @@ -40,14 +40,14 @@ laser_temperature: 'uint16'
laser_current_average: 'uint16'
control_voltage: 'float32'
optical_control_voltage_output: 'float32'
sensor_voltage_supply : 'object'
current_heating_pane_transmitter_head : 'object'
current_heating_pane_receiver_head: 'object'
sensor_voltage_supply : 'str'
current_heating_pane_transmitter_head : 'str'
current_heating_pane_receiver_head: 'str'
temperature_ambient: 'float32'
current_heating_voltage_supply: 'object'
current_heating_house: 'object'
current_heating_heads: 'object'
current_heating_carriers: 'object'
current_heating_voltage_supply: 'str'
current_heating_house: 'str'
current_heating_heads: 'str'
current_heating_carriers: 'str'
number_particles: 'uint16'
number_particles_internal_data: 'float32'
number_particles_min_speed: 'uint16'
Expand Down Expand Up @@ -78,7 +78,7 @@ number_particles_class_8: 'uint16'
number_particles_class_8_internal_data: 'float32'
number_particles_class_9: 'uint16'
number_particles_class_9_internal_data: 'float32'
raw_drop_number: 'object'
raw_drop_number: 'str'
# '521': air_temperature
# '522': relative_humidity
# '523': wind_speed
Expand Down
2 changes: 1 addition & 1 deletion disdrodb/L0/readers/NCAR/reader_VORTEX2_2010.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def df_sanitizer_fun(df, lazy=False):
df["time"] = dd.to_datetime(
df["sensor_date"] + "-" + df["sensor_time"], format="%d.%m.%Y-%H:%M:%S"
)
df = df.drop(columns=["sensor_date", "sensor_time"])
df = df.drop(columns=["sensor_date", "sensor_time", "start_time"])

# Reformat weather codes
if "weather_code_metar_4678" in df.columns:
Expand Down
34 changes: 22 additions & 12 deletions disdrodb/L0/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,13 @@

PRODUCT_VERSION = "V0"
SOFTWARE_VERSION = "V0"
EPOCH = u"seconds since 1970-01-01 00:00:00"

# TODO:
# - get_L0_dtype,
# - get_L1_netcdfencoding_dict --> get_encodings(sensor_name)
# - _get_encodings_key
# - get_encodings_dtype
# - get_encodings_chunk
# - get_encodings_ ....

# Notes:
# - L0A_encodings currently specify only the dtype. This could be expanded in the future.
# - disdrodb.configs ... the netcdf chunk size could be an option to be specified


def read_config_yml(sensor_name, filename):
"""Read a config yaml file and return the dictionary."""
Expand Down Expand Up @@ -94,7 +93,8 @@ def get_data_format_dict(sensor_name):
def get_long_name_dict(sensor_name):
"""Get a dictionary containing the long name of each sensor variable."""
return read_config_yml(sensor_name=sensor_name, filename="variable_longname.yml")



def get_units_dict(sensor_name):
"""Get a dictionary containing the unit of each sensor variable."""
return read_config_yml(sensor_name=sensor_name, filename="variable_units.yml")
Expand All @@ -120,17 +120,19 @@ def get_velocity_bins_dict(sensor_name):


def get_L0A_dtype(sensor_name):
"""Get a dictionary containing the L0 dtype."""
"""Get a dictionary containing the L0A dtype."""
# Note: This function could extract the info from get_L0A_encodings_dict in future.
d = read_config_yml(sensor_name=sensor_name, filename="L0A_encodings.yml")
return d


def get_L0A_encodings_dict(sensor_name):
"""Get a dictionary containing the L0 dtype."""
# L0A_encodings currently specify only the dtype ... could be expanded in future.
"""Get a dictionary containing the L0A encodings."""
# - L0A_encodings currently specify only the dtype. This could be expanded in the future.
d = read_config_yml(sensor_name=sensor_name, filename="L0A_encodings.yml")
return d


def get_L0B_encodings_dict(sensor_name):
"""Get a dictionary containing the encoding to write L0B netCDFs."""
d = read_config_yml(sensor_name=sensor_name, filename="L0B_encodings.yml")
Expand Down Expand Up @@ -161,6 +163,13 @@ def get_L0B_encodings_dict(sensor_name):
return d


def get_time_encoding():
encoding = {}
encoding['units'] = EPOCH
encoding['calendar'] = 'proleptic_gregorian'
return encoding


def set_DISDRODB_L0_attrs(ds, attrs):
sensor_name = attrs['sensor_name']
#----------------------------------
Expand All @@ -172,7 +181,7 @@ def set_DISDRODB_L0_attrs(ds, attrs):
description_dict = get_description_dict(sensor_name)
units_dict = get_units_dict(sensor_name)
long_name_dict = get_long_name_dict(sensor_name)
data_format_dict = get_data_format_dict(sensor_name)
# data_format_dict = get_data_format_dict(sensor_name)
for var in list(ds.data_vars):
attrs_var = {}
attrs_var['long_name'] = long_name_dict[var]
Expand Down Expand Up @@ -201,6 +210,7 @@ def set_DISDRODB_L0_attrs(ds, attrs):

return ds


####-------------------------------------------------------------------------.
#############################################
#### Get diameter and velocity bins info ####
Expand Down
6 changes: 3 additions & 3 deletions disdrodb/tests/test_reader_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@author: ghiggi
"""
from disdrodb.L0.L0A_processing import read_raw_data
from disdrodb.L0.L0B_processing import retrieve_L1_raw_arrays, create_L0B_from_L0A
from disdrodb.L0.L0B_processing import retrieve_L0B_arrays, create_L0B_from_L0A

lazy = False # should we test also True !

Expand Down Expand Up @@ -38,7 +38,7 @@
df = df_sanitizer_fun(df, lazy=lazy)
print(df)

dict_data = retrieve_L1_raw_arrays(df, sensor_name, lazy=lazy, verbose=False)
dict_data = retrieve_L0B_arrays(df, sensor_name, lazy=lazy, verbose=False)

# Note: here the dtype of the 1D variable is object. Expected.
ds = create_L0B_from_L0A(df, attrs, lazy=True, verbose=False)
ds = create_L0B_from_L0A(df, attrs, lazy=lazy, verbose=False)

0 comments on commit 985f082

Please sign in to comment.