Skip to content

Commit

Permalink
EMDCC-1664: Correct decumulation
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Jan 14, 2025
1 parent 76900d6 commit 9ef4604
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
8 changes: 6 additions & 2 deletions src/lisfloodutilities/gridding/decumulate_daily_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,12 @@ def print_statistics(provider_ids: List[str], df_kiwis_24h: pd.DataFrame, df_kiw
PROVIDER_ID = row['provider_id']
TOTAL_24H_STATIONS = row['count_24h']
DECUMULATED_24H_STATIONS = row['count_6h_after']
DECUMULATED_STATIONS_24H_PERCENT = 100.0 * DECUMULATED_24H_STATIONS / TOTAL_24H_STATIONS
DECUMULATED_STATIONS_RELATIVE_TO_6H_PERCENT = 100.0 * DECUMULATED_24H_STATIONS / TOTAL_6H_STATIONS
DECUMULATED_STATIONS_24H_PERCENT = 0.0
if TOTAL_24H_STATIONS > 0.0:
DECUMULATED_STATIONS_24H_PERCENT = 100.0 * DECUMULATED_24H_STATIONS / TOTAL_24H_STATIONS
DECUMULATED_STATIONS_RELATIVE_TO_6H_PERCENT = 0.0
if TOTAL_6H_STATIONS > 0.0:
DECUMULATED_STATIONS_RELATIVE_TO_6H_PERCENT = 100.0 * DECUMULATED_24H_STATIONS / TOTAL_6H_STATIONS
stats_string = (
f'#APP_STATS: {{"TIMESTAMP": "{TIMESTAMP}", "PROVIDER_ID": {PROVIDER_ID}, '
f'"TOTAL_24H_STATIONS": {TOTAL_24H_STATIONS}, "DECUMULATED_24H_STATIONS": {DECUMULATED_24H_STATIONS}, '
Expand Down
19 changes: 13 additions & 6 deletions src/lisfloodutilities/gridding/lib/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat
self.print_stats = True
self.cur_timestamp = dt.strptime(f'{kiwis_timestamps[i]}', "%Y%m%d%H%M")
df_kiwis = self.apply_filter(df_kiwis)
# Drop internal columns to clean the dataframe
df_kiwis = df_kiwis.drop(columns=self.INTERNAL_COLUMNS, axis=1, errors='ignore')
filtered_data_frames.append(df_kiwis)
i += 1
return filtered_data_frames
Expand Down Expand Up @@ -376,13 +378,13 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
if 'EXCLUDE_BELLOW_LATITUDE' in self.args:
self.threshold_max_latitude = np.float32(self.args['EXCLUDE_BELLOW_LATITUDE'])
except Exception as e:
print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default max Latitude {self.threshold_max_latitude}')
self.print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default max Latitude {self.threshold_max_latitude}')
self.threshold_min_value = 0.0
try:
if 'EXCLUDE_BELLOW_VALUE' in self.args:
self.threshold_min_value = np.float32(self.args['EXCLUDE_BELLOW_VALUE'])
except Exception as e:
print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default min RG value {self.threshold_min_value}')
self.print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default min RG value {self.threshold_min_value}')

def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
df = super().apply_filter(df)
Expand Down Expand Up @@ -417,7 +419,7 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
self.num_6h_slots = 0
self.PROVIDER_DWD_SYNOP = '1121'
self.PROVIDER_MARS = '1295'

def get_all_6hourly_station_values_df(self, kiwis_data_frames: List[pd.DataFrame]) -> pd.DataFrame:
merged_df = pd.concat(kiwis_data_frames)
merged_df = merged_df[[self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, self.COL_VALUE]]
Expand Down Expand Up @@ -508,7 +510,8 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat
providers_ids = list(self.provider_radius.keys())

# Filter all the dataframes
filtered_data_frames = super().filter(kiwis_files[1:], kiwis_timestamps[1:], kiwis_6h_dataframes)
# filtered_data_frames = super().filter(kiwis_files[1:], kiwis_timestamps[1:], kiwis_6h_dataframes)
filtered_data_frames = kiwis_6h_dataframes

# Get all 6hourly slots aggregated
self.all_6h_df = self.get_all_6hourly_station_values_df(filtered_data_frames)
Expand Down Expand Up @@ -562,20 +565,24 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat

# Insert the decumulated stations in the respective 6h slots
return_data_frames = [self.kiwis_24h_dataframe]
i = 1 # First kiwis contains the daily values and the next 4 kiwis contain the 6hourly values
for df in filtered_data_frames:
df = df.drop(columns=self.INTERNAL_COLUMNS, axis=1, errors='ignore')
# Now we need to eliminate the stations that have neighbors on this 6h slot,
# which means the slot of 6h have already a 6h value in the radius and no
# decumulation is needed in this slot.
df_decumulated_24h = self.df_24h_with_neighbors
df_decumulated_24h = self.df_24h_with_neighbors.copy(deep=True)
df_decumulated_24h['has_neighbor_within_radius'] = False
tree = cKDTree(df[[self.COL_LON, self.COL_LAT]])
df_decumulated_24h = self.update_column_if_provider_stations_are_in_radius(df=df_decumulated_24h, tree=tree)
df_decumulated_24h = df_decumulated_24h[df_decumulated_24h['has_neighbor_within_radius'] == False]
df_decumulated_24h = df_decumulated_24h.drop(columns=self.INTERNAL_COLUMNS, axis=1, errors='ignore')
# Append both decumulated 24h dataframes to the 6h slot
df_filtered = pd.concat([df, self.df_24h_without_neighbors, df_decumulated_24h])
df_filtered = pd.concat([df, self.df_24h_without_neighbors, df_decumulated_24h], ignore_index=True)
return_data_frames.append(df_filtered)
# Timestamp is used to print the 6hourly statistics
self.cur_timestamp = dt.strptime(f'{kiwis_timestamps[i]}', "%Y%m%d%H%M")
i += 1
self.print_statistics(df_filtered)
return return_data_frames

0 comments on commit 9ef4604

Please sign in to comment.