Add ESGF search link to data table

WCRP-CORDEX · Nov 7, 2024 · 298f8cc · 298f8cc
1 parent f7e8ab1
commit 298f8cc
Show file tree

Hide file tree

Showing 7 changed files with 6,000 additions and 6,322 deletions.
diff --git a/CORDEX_FPSCONV_status.py b/CORDEX_FPSCONV_status.py
@@ -23,29 +23,42 @@
   'model', 'model_version', 'frequency', 'variable', 'version'
 )
 
+def version_range_string(vstring):
+  vint = int(vstring[1:])
+  return('minVersionDate=%d&maxVersionDate=%d' % (vint, vint))
+
 #
 #   Load search results
 #
 conn = SearchConnection('http://esgf-data.dkrz.de/esg-search', distrib=True)
 #conn = SearchConnection('https://esgf-node.ipsl.upmc.fr/esg-search', distrib=True)
 logging.getLogger('pyesgf.search.connection').setLevel(loglevel)
 dflist = []
-for proj in ['cordex-fpsconv','CORDEX-FPSCONV']:
+for proj in ['CORDEX-FPSCONV']:
   logger.info(f'Retrieving {proj} variables ...')
   ctx = conn.new_context(project = proj)
   dids = [result.dataset_id for result in ctx.search(batch_size=1000, ignore_facet_check=True)]
   datanode_part = re.compile('\|.*$')
   dataset_ids = [datanode_part.sub('', did).split('.') for did in dids]
   dflist.append(pd.DataFrame(dataset_ids))
 df = pd.concat(dflist)
-
 df.columns = facets
+
+# Add ESGF search URL
+search_urls = []
+for idx, row in df.iterrows():
+  row_dict = row.to_dict()
+  search_urls.append(
+    'https://esgf-metagrid.cloud.dkrz.de/search?project=CORDEX&'
+#      + version_range_string(row_dict['version']) + '&'
+      + 'activeFacets=%7B%22project%22%3A%22CORDEX-FPSCONV%22%2C%22experiment%22%3A%22{experiment}%22%2C%22driving_model%22%3A%22{driving_model}%22%2C%22institute%22%3A%22{institution}%22%2C%22domain%22%3A%22{domain}%22%2C%22ensemble%22%3A%22{ensemble}%22%2C%22rcm_name%22%3A%22{model}%22%2C%22rcm_version%22%3A%22{model_version}%22%2C%22time_frequency%22%3A%22{frequency}%22%2C%22variable%22%3A%22{variable}%22%7D'.format_map(row_dict)
+  )
+df['search_url'] = search_urls
+
 df.to_csv('docs/CORDEX_FPSCONV_ESGF_all_variables.csv', index = False)
+
 # Drop unnecessary columns
-df.drop(
-  ['project', 'activity', 'version'],
-  axis = 'columns', inplace = True
-)
+df.drop(columns = ['project', 'activity', 'version'], inplace = True)
 df.drop_duplicates(inplace = True)
 df.sort_values(['domain', 'institution', 'model', 'model_version', 'driving_model', 'ensemble', 'experiment'], inplace = True)
 
@@ -68,7 +81,6 @@
   .replace(r'(.*) \(fx\)', r'\1 (fx)   ', regex=True)
   .replace(r'(.*) \(xhr\)', r'\1 (xhr)  ', regex=True)
 ).to_list()
-print(xticklabels)
 ax.set_xticklabels(xticklabels)
 ax.set_xlabel("variable (freq.)")
 ax.set_yticks(0.5+np.arange(len(matrix.index)))
@@ -86,12 +98,14 @@
 csv2datatable(
   'docs/CORDEX_FPSCONV_status.csv',
   'docs/CORDEX_FPSCONV_varlist.html',
+  column_as_link = 'variable',
+  column_as_link_source = 'search_url',
   title = 'CORDEX-FPSCONV on ESGF',
   intro = f'''
 <p> CORDEX-FPSCONV simulations providing some data on ESGF as of <b>{datetime.datetime.now().strftime("%Y-%m-%d %H:%M")}</b>. The full list as CSV can be obtained from <a href="https://github.com/WCRP-CORDEX/simulation-status/raw/main/docs/CORDEX_FPSCONV_ESGF_all_variables.csv">here</a>.
-</p>
+<p> The graphical summary below provides just an overview of the existing data. The variables shown could be available only for a particular experiment (e.g. only for evaluation and not for the scenarios). All subdaily output (1hr, 3hr and 6hr) has been collapsed into a single entry marked as 'xhr'. Use the search box below to find the actual variables and frecuencies available for a given experiment. E.g. try to enter "hr rcp ta500". Variables names in the interactive data table below link to the ESGF, where you can download the corresponding data files.
+<p>
 <img src="CORDEX_FPSCONV_varlist.png"/>
-<p> The graphical summary above provides just an overview of the existing data. The variables shown could be available only for a particular experiment (e.g. only for evaluation and not for the scenarios). All subdaily output (1hr, 3hr and 6hr) has been collapsed into a single entry marked as 'xhr'. Use the search box below to find the actual variables and frecuencies available for a given experiment. E.g. try to enter "hr rcp ta500".
 '''
 )
 
@@ -133,7 +147,7 @@
 for domain in domains:
   f.write(f'''<h2 id="{domain}">{domain}<a href="#top">^</a></h2>''')
   dom_df = df[df.domain == domain]
-  dom_df = dom_df.drop(columns=['frequency', 'variable']).drop_duplicates()
+  dom_df = dom_df.drop(columns=['frequency', 'variable', 'search_url']).drop_duplicates()
   if dom_df.empty:
     continue
   dom_df = dom_df.assign(htmlstatus=pd.Series('<span class="' + dom_df.status + '">' + dom_df.experiment + '</span>', index=dom_df.index))