Add NAM domain table and some fixes

Fixes necessary due to pandas updates See #25
WCRP-CORDEX · Feb 18, 2025 · 1eee55b · 1eee55b
1 parent 50981d7
commit 1eee55b
Show file tree

Hide file tree

Showing 8 changed files with 12,967 additions and 16 deletions.
diff --git a/CMIP6_studies_config.yaml b/CMIP6_studies_config.yaml
@@ -39,6 +39,11 @@ spatial_scope_filter:
     - BLK # Black Sea
     - special
 
+  NAM:
+    - Global
+    - NH
+    - special
+
   SEA:
     - Global
     - Tropics

diff --git a/CMIP6_studies_table.py b/CMIP6_studies_table.py
@@ -9,6 +9,7 @@
 from icecream import ic
 
 CORDEX_DOMAIN = sys.argv[1]
+rcm_source_types = ['ARCM', 'AORCM']
 
 def synthesis_sum_failures(binvalues, test = False):
   if test:
@@ -90,18 +91,18 @@ def get_from_config(config, label, domain):
 mandatory_scenarios = ['historical','ssp126', 'ssp370']
 lbc_for_source_type = get_from_config(config, 'lbc_for_source_type', CORDEX_DOMAIN)
 tableavail.loc[:,'synthesis'] = np.logical_and.reduce(
-  tableavail.loc[:,mandatory_scenarios] == lbc_for_source_type, axis=1
+  tableavail.loc[:,mandatory_scenarios].isin(rcm_source_types), axis=1
 ) * 1
 tableavail.loc[:,'synthesis'] = tableavail.loc[:,'synthesis'].astype(int)
 # - filter out entries with less than 2 scenarios unless a metric is available for it
 availscenarios = ['ssp126', 'ssp245', 'ssp370', 'ssp585']
-tableavail_row_filter = np.sum(tableavail.loc[:,availscenarios] == lbc_for_source_type, axis=1) >= 2
+tableavail_row_filter = np.sum(tableavail.loc[:,availscenarios].isin(rcm_source_types), axis=1) >= 2
 row_filter = set(tableavail.index[tableavail_row_filter]).union(
   tableprange.index,
   tablespread.index,
   tableother.index
 ).intersection(tableavail.index)
-tableavail = tableavail.loc[row_filter]
+tableavail = tableavail.loc[list(row_filter)]
 
 # All together
 main_headers = ['1. Availability', '2. Plausibility', '3. Spread of future outcomes', '4. Other criteria']
@@ -138,7 +139,7 @@ def greyout_non_rcm(df):
 # Bug in pandas https://github.com/pandas-dev/pandas/issues/35429
 #  return(df.where(df == 'RCM', attr))
   rval = df.copy()
-  rval.iloc[:] = np.where((rval == lbc_for_source_type).fillna(False), rval, attr)
+  rval.iloc[:] = np.where(rval.isin(rcm_source_types).fillna(False), '', attr)
   return(rval)
 
 def greyout_unplausible(df):
@@ -224,11 +225,15 @@ def single_member(filt):
     filters[filtname] = single_member(filters[filtname])
 else:
   single_member_in_title = ''
-pd.set_option('precision', 2)
-format_dict = { # Format exceptions
+
+format_dict = {col: "{:.2f}" for col in tablefull.select_dtypes(include=['float64']).columns}
+format_dict.update({ # Format exceptions
+                                        ('1. Availability', 'synthesis'): '{:.0f}',
                          ('2. Plausibility', 'Nabat EUR AOD hist trend'): '{:.3f}',
+                                       ('2. Plausibility', 'Bru20 perf'): '{:.3f}',
   ('3. Spread of future outcomes', 'Nabat EUR AOD future change ssp585'): '{:.3f}'
-}
+})
+
 d1 = dict(selector=".level0", props=[('min-width', '150px')])
 f = open(f'docs/CMIP6_studies_table_{CORDEX_DOMAIN}.html','w')
 f.write(f'''<!DOCTYPE html>
@@ -301,6 +306,7 @@ def single_member(filt):
 domain_filters = get_from_config(config, 'tables_filter', CORDEX_DOMAIN)
 f.write('\n'.join([f'\n<li><a href="#{filter_metadata[filtname]["id"]}">{filter_metadata[filtname]["header"]}</a></li>' for filtname in domain_filters]))
 f.write('</ul>')
+
 for filtname in domain_filters:
   if ~filters[filtname].any(): # Skip empty tables
     continue
@@ -313,7 +319,7 @@ def single_member(filt):
     .loc[filters[filtname]]
     .convert_dtypes(convert_string = False, convert_boolean = False)
     .style
-      .format(format_dict)
+      .format(format_dict, na_rep='')
       .set_properties(**{'font-size':'8pt', 'border':'1px lightgrey solid !important'})
       .set_table_styles([d1,{
         'selector': 'th',
@@ -324,7 +330,7 @@ def single_member(filt):
       .apply(greyout_unplausible_rows, axis=0, subset=spreadcols+othercols)
       .apply(highligh_plausible_range, axis=0)
       .apply(color_classes, axis=0, subset=spreadcols)
-      .render()
+      .to_html()
       .replace('nan','')
   )
 f.write('</body></html>')

diff --git a/Makefile b/Makefile
@@ -10,7 +10,7 @@ update-esgf:
 	python3 CMIP6_for_CORDEX.py
 	python3 CMIP6_for_CORDEX_availability_RCM.py
 
-update-tables: AUS EUR MED SEA
+update-tables: AUS EUR MED NAM SEA
 
 AUS: CMIP6_downscaling_plans.csv
 	python3 CMIP6_studies_table.py AUS
@@ -23,6 +23,10 @@ MED: CMIP6_downscaling_plans.csv
 	python3 CMIP6_studies_table.py MED
 	python3 util/row_tooltips.py MED
 	python3 CMIP6_studies_list.py MED > docs/CMIP6_studies_list_MED.md
+NAM: CMIP6_downscaling_plans.csv
+	python3 CMIP6_studies_table.py NAM
+	python3 util/row_tooltips.py NAM
+	python3 CMIP6_studies_list.py NAM > docs/CMIP6_studies_list_NAM.md
 SEA: CMIP6_downscaling_plans.csv
 	python3 CMIP6_studies_table.py SEA
 	python3 CMIP6_studies_list.py SEA > docs/CMIP6_studies_list_SEA.md
diff --git a/YamlStudies.py b/YamlStudies.py
@@ -122,28 +122,28 @@ def expand_data(self):
             self.data.loc[model + '_r%(r)si%(i)sp%(p)sf%(f)s' % thisripf] = self.data.loc[key]
             modelmeanflag[model + '_r%(r)si%(i)sp%(p)sf%(f)s' % thisripf] = 1
           self.data.drop(index = key, inplace = True)
-    self.is_ens_mean = self.data.iloc[:,0].copy()
+    self.is_ens_mean = self.data.iloc[:,0].copy().astype(bool)
     self.is_ens_mean.iloc[:] = False
     self.is_ens_mean = self.is_ens_mean | (pd.DataFrame.from_dict(modelmeanflag, orient='index', columns=['is_ens_mean']) != 1)
 
   def get_class_data(self):
     if self.has_classes():
-      rval = self.data.copy()
+      rval = self.data.copy().astype("object")
       rval.iloc[:] = pd.cut(self.data.values.flat,
         self.classes[0]['limits'],
         labels=self.classes[0]['labels'],
         ordered = True # TODO: could be made False if 'colors' are passed (e.g. to have ['unplausible', 'medium','unplausible'])
-      )
+      ).to_numpy().reshape(rval.shape)#.astype("category")
     elif self.metric.units == 'categorical':
       rval = self.data.copy()
     else:
-      rval = self.data.copy()
+      rval = self.data.copy().astype("object")
       try:
         for icol in range(rval.shape[1]):
           rval.iloc[:,icol] = pd.qcut(rval.iloc[:,icol].values.flat,
             q=3, # terciles
             labels=[f'T{x}' for x in range(1,3+1)]
-          )
+          ).to_numpy().reshape(-1,1)#.astype("category")
       except ValueError:
         rval = self.data.copy()
     return(rval)
-Original file line number
+Diff line change
@@ Expand Up / @@ -39,6 +39,11 @@ spatial_scope_filter: @@
         - BLK # Black Sea
         - special
+      NAM:
+        - Global
+        - NH
+        - special
       SEA:
         - Global
         - Tropics
@@ Expand Down @@