Skip to content

Commit

Permalink
update to code to run3D base stats, 3D var on megamem cluster #17
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas-Moore-Creative committed May 3, 2024
1 parent 81bf330 commit ac3ca75
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
65 changes: 65 additions & 0 deletions src/BRAN2020_temp_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# ///////////////////////
# BRAN2020_temp_stats.py
# 3 May 2024
#////////////////////////
# --------- packages --------------
import intake
import xarray as xr
import pandas as pd
import numpy as np
from dask.distributed import Client, LocalCluster
import dask
import datetime
import os
import configparser
import sys

def main():
"""
spin up cluster & do the work
"""
print("importing functions ...")
# Append the directory of the module to sys.path - import functions
sys.path.append('/g/data/es60/users/thomas_moore/code/Climatology-generator-demo/src/')
import bran2020_stats

print("Spinning up a dask cluster...")
# ----------- cluster -----------------------
import dask
from dask.distributed import Client, LocalCluster


cluster = LocalCluster(n_workers=48,threads_per_worker=1,processes=True)
client = Client(cluster)
print(client)
#
#
config = configparser.ConfigParser()
config.read('/g/data/es60/users/thomas_moore/code/BRAN2020-intake-catalog/config.ini')
# Get the value of a variable
catalog_path = config.get('paths', 'catalog_path')
#
BRAN2020_catalog = intake.open_esm_datastore(catalog_path+'BRAN2020.json',columns_with_iterables=['variable'])
var_request_list = ['temp']
var = var_request_list[0]
time_period_request_list = ['daily']
search = BRAN2020_catalog.search(variable=var,time_period=time_period_request_list)
xarray_open_kwargs = {"chunks": {"Time": -1,'st_ocean':10}}
DS=search.to_dask(xarray_open_kwargs=xarray_open_kwargs)
# stats_monthclim(ds,var_name,time_dim='time',method_str='cohorts',skipna_flag=False)
stats_monthclim_ds = bran2020_stats.stats_monthclim(DS,var_name=var,time_dim='Time',skipna_flag=False,method_str='cohorts')
print(stats_monthclim_ds.nbytes/1e9)
bran2020_stats.print_chunks(stats_monthclim_ds[var])

results_path = '/g/data/es60/users/thomas_moore/clim_demo_results/daily/draft_delivery/'
results_file = 'BRAN2020_stats_monthclim_'+var+'.nc'

print("writing to the netcdf file for : "+var+" ....")

stats_monthclim_ds.to_netcdf(results_path+results_file,engine='netcdf4')

print("netcdf written: "+var)
print("done with basic stats calc and write to netcdf for: "+var)

if __name__ == "__main__":
main()
10 changes: 9 additions & 1 deletion src/bran2020_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@


# Function definitions
def mean_monthclim_flox(ds,var_name,time_dim='time',method_str='cohorts',skipna_flag=False):
def print_chunks(data_array):
chunks = data_array.chunks
dim_names = data_array.dims
readable_chunks = {dim: chunks[i] for i, dim in enumerate(dim_names)}
for dim, sizes in readable_chunks.items():
print(f"{dim} chunks: {sizes}")
return readable_chunks

def stats_monthclim(ds,var_name,time_dim='time',method_str='cohorts',skipna_flag=False):
"""
currently written for single variable datasets
"""
Expand Down
17 changes: 17 additions & 0 deletions src/scripts/BRAN2020_temp_stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash -l

#PBS -P v19
#PBS -q megamem
#PBS -l walltime=48:00:00
#PBS -l ncpus=48
#PBS -l mem=2990GB
#PBS -l jobfs=1400GB
#PBS -l wd
#PBS -l storage=gdata/xv83+gdata/gb6+gdata/v14+gdata/es60+scratch/es60+gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65
#PBS -j oe
#PBS -M thomas.moore@csiro.au
#PBS -m abe

conda activate pangeo_bran2020_demo

python -u ../BRAN2020_temp_stats.py > ./logs/$PBS_JOBID-megamem-3D-temp-stats.log 2>&1

0 comments on commit ac3ca75

Please sign in to comment.