Skip to content

Commit

Permalink
Add ESGF search link to data table
Browse files Browse the repository at this point in the history
  • Loading branch information
jesusff committed Nov 7, 2024
1 parent f7e8ab1 commit 298f8cc
Show file tree
Hide file tree
Showing 7 changed files with 6,000 additions and 6,322 deletions.
34 changes: 24 additions & 10 deletions CORDEX_FPSCONV_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,42 @@
'model', 'model_version', 'frequency', 'variable', 'version'
)

def version_range_string(vstring):
vint = int(vstring[1:])
return('minVersionDate=%d&maxVersionDate=%d' % (vint, vint))

#
# Load search results
#
conn = SearchConnection('http://esgf-data.dkrz.de/esg-search', distrib=True)
#conn = SearchConnection('https://esgf-node.ipsl.upmc.fr/esg-search', distrib=True)
logging.getLogger('pyesgf.search.connection').setLevel(loglevel)
dflist = []
for proj in ['cordex-fpsconv','CORDEX-FPSCONV']:
for proj in ['CORDEX-FPSCONV']:
logger.info(f'Retrieving {proj} variables ...')
ctx = conn.new_context(project = proj)
dids = [result.dataset_id for result in ctx.search(batch_size=1000, ignore_facet_check=True)]
datanode_part = re.compile('\|.*$')
dataset_ids = [datanode_part.sub('', did).split('.') for did in dids]
dflist.append(pd.DataFrame(dataset_ids))
df = pd.concat(dflist)

df.columns = facets

# Add ESGF search URL
search_urls = []
for idx, row in df.iterrows():
row_dict = row.to_dict()
search_urls.append(
'https://esgf-metagrid.cloud.dkrz.de/search?project=CORDEX&'
# + version_range_string(row_dict['version']) + '&'
+ 'activeFacets=%7B%22project%22%3A%22CORDEX-FPSCONV%22%2C%22experiment%22%3A%22{experiment}%22%2C%22driving_model%22%3A%22{driving_model}%22%2C%22institute%22%3A%22{institution}%22%2C%22domain%22%3A%22{domain}%22%2C%22ensemble%22%3A%22{ensemble}%22%2C%22rcm_name%22%3A%22{model}%22%2C%22rcm_version%22%3A%22{model_version}%22%2C%22time_frequency%22%3A%22{frequency}%22%2C%22variable%22%3A%22{variable}%22%7D'.format_map(row_dict)
)
df['search_url'] = search_urls

df.to_csv('docs/CORDEX_FPSCONV_ESGF_all_variables.csv', index = False)

# Drop unnecessary columns
df.drop(
['project', 'activity', 'version'],
axis = 'columns', inplace = True
)
df.drop(columns = ['project', 'activity', 'version'], inplace = True)
df.drop_duplicates(inplace = True)
df.sort_values(['domain', 'institution', 'model', 'model_version', 'driving_model', 'ensemble', 'experiment'], inplace = True)

Expand All @@ -68,7 +81,6 @@
.replace(r'(.*) \(fx\)', r'\1 (fx) ', regex=True)
.replace(r'(.*) \(xhr\)', r'\1 (xhr) ', regex=True)
).to_list()
print(xticklabels)
ax.set_xticklabels(xticklabels)
ax.set_xlabel("variable (freq.)")
ax.set_yticks(0.5+np.arange(len(matrix.index)))
Expand All @@ -86,12 +98,14 @@
csv2datatable(
'docs/CORDEX_FPSCONV_status.csv',
'docs/CORDEX_FPSCONV_varlist.html',
column_as_link = 'variable',
column_as_link_source = 'search_url',
title = 'CORDEX-FPSCONV on ESGF',
intro = f'''
<p> CORDEX-FPSCONV simulations providing some data on ESGF as of <b>{datetime.datetime.now().strftime("%Y-%m-%d %H:%M")}</b>. The full list as CSV can be obtained from <a href="https://github.com/WCRP-CORDEX/simulation-status/raw/main/docs/CORDEX_FPSCONV_ESGF_all_variables.csv">here</a>.
</p>
<p> The graphical summary below provides just an overview of the existing data. The variables shown could be available only for a particular experiment (e.g. only for evaluation and not for the scenarios). All subdaily output (1hr, 3hr and 6hr) has been collapsed into a single entry marked as 'xhr'. Use the search box below to find the actual variables and frecuencies available for a given experiment. E.g. try to enter "hr rcp ta500". Variables names in the interactive data table below link to the ESGF, where you can download the corresponding data files.
<p>
<img src="CORDEX_FPSCONV_varlist.png"/>
<p> The graphical summary above provides just an overview of the existing data. The variables shown could be available only for a particular experiment (e.g. only for evaluation and not for the scenarios). All subdaily output (1hr, 3hr and 6hr) has been collapsed into a single entry marked as 'xhr'. Use the search box below to find the actual variables and frecuencies available for a given experiment. E.g. try to enter "hr rcp ta500".
'''
)

Expand Down Expand Up @@ -133,7 +147,7 @@
for domain in domains:
f.write(f'''<h2 id="{domain}">{domain}<a href="#top">^</a></h2>''')
dom_df = df[df.domain == domain]
dom_df = dom_df.drop(columns=['frequency', 'variable']).drop_duplicates()
dom_df = dom_df.drop(columns=['frequency', 'variable', 'search_url']).drop_duplicates()
if dom_df.empty:
continue
dom_df = dom_df.assign(htmlstatus=pd.Series('<span class="' + dom_df.status + '">' + dom_df.experiment + '</span>', index=dom_df.index))
Expand Down
Loading

0 comments on commit 298f8cc

Please sign in to comment.