add c flux calculations

carbonplan · Nov 12, 2024 · 0b0b2d4 · 0b0b2d4
1 parent 54102eb
commit 0b0b2d4
Show file tree

Hide file tree

Showing 8 changed files with 1,582 additions and 9 deletions.
diff --git a/scepter/control-scripts/singlerun_dynamicApp_multiyear.sh b/scepter/control-scripts/singlerun_dynamicApp_multiyear.sh
@@ -0,0 +1,57 @@
+#! /usr/bin/bash
+
+# name of run script
+runscript="$1/rock_buff_dust-ts_multiyear.py"
+# model directory
+modeldir="$1"
+# output directory
+outdir="$modeldir/scepter_output/"
+# default dictionary
+default_dict="$5"  # see SCEPTER/defaults/dict_singlerun.py
+
+
+# --- COLLECT INPUTS ---
+# input csv
+input_dir=$2
+input_name=$3
+input_index=$4
+((input_index++))  # add one to skip column names
+
+# check if directory exists
+if [ ! -d "$input_dir" ]; then
+    echo "Error: Directory '$input_dir' does not exist."
+    exit 1
+fi
+# check if file exists
+file_path="$input_dir/$input_name"
+if [ ! -f "$file_path" ]; then
+    echo "Error: File '$file_path' does not exist."
+    exit 1
+fi
+# read data from the specified row number
+row_data=$(sed "${input_index}q;d" "$file_path")
+echo "Data from row $row_number: $row_data"
+
+# read the header row to get column names
+header_row=$(head -n 1 "$file_path")
+IFS=$' , ' read -r -a columns <<< "$header_row"
+# assign values to variables using column names
+IFS=$' , ' read -r -a values <<< "$row_data"
+declare -A params
+for ((i=0; i<${#columns[@]}; i++)); do
+    column_name="${columns[i]}"
+    value="${values[i]}"
+    declare "$column_name"="$value"
+    params["$column_name"]="$value"
+done
+
+# build the Python command
+python_cmd="python3 $runscript --modeldir $modeldir --outdir $outdir --default_dict $default_dict"
+for key in "${!params[@]}"; do
+    python_cmd+=" --$key ${params[$key]}"
+done
+
+# Run the Python script
+echo "running python script with command:"
+echo "$python_cmd" # troubleshoot
+eval "$python_cmd"
diff --git a/scepter/run-multiple.sh b/scepter/run-multiple.sh
@@ -7,7 +7,8 @@
 # batch_base="batch-tph-liming_noFert-multiyear-base.yaml"
 # batch_base="batch-liming_fixedRate-base.yaml"
 # batch_base="batch-basalt_fixedRate-base.yaml"
-batch_base="batch-meanAnnliming_fert_fixedRate-base.yaml"
+batch_base="batch-meanAnnliming_fert_dust-multiyear-base.yaml"
+# batch_base="batch-meanAnnliming_fert_fixedRate-base.yaml"
 # batch_base="batch-tph-multiyear-base.yaml"
 # batch_base="batch-meanAnnliming_fixedRate-base.yaml"
 # batch_base="batch-meanAnnbasalt_fixedRate-base.yaml"
@@ -35,5 +36,5 @@ do
     echo "Running ${i}"
     argo submit scepter-workflow.yaml --parameter-file $paramfile -p batch-index="${i}"
     # [TROUBLESHOOT]
-    sleep 1m  # take a break between submitted jobs :)
+    sleep 15s  # take a break between submitted jobs :)
 done
diff --git a/scepter/setup/batch-setup/make-dustflx-csv.py b/scepter/setup/batch-setup/make-dustflx-csv.py
@@ -0,0 +1,84 @@
+# %%
+# -------------------------------------------------------
+# 
+# Script to make a csv for dust flux data over time 
+# 
+# Note that as of 10/20/2024 it only works with the 
+# SCEPTER/rock_buff_dust-ts_multiyear.py script
+# 
+# -------------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+
+# %%
+# --- set up the save params
+# ***************************************************************
+savehere = "/home/tykukla/aglime-swap-cdr/scepter/dust-inputs"
+savename = "cc_15yr_1app_no2nd_001.csv"
+# ***************************************************************
+
+# %% 
+# --- DEFINE TIME STEPS
+# [1] the total amount of time to split up into sub-runs
+max_time = 15 # [years] the end of the batch simulation
+# [2] list of the years where a new sub-run starts (values
+#     cannot exceed max_time)
+start_times = [0, 1]   # [years]
+
+# --- DEFINE DUST APPLICATION
+# (note, lists must have same length as start_times)
+# [1] define the dust species applied at each timestep 
+dustsp = ['cc', 'cc']
+# [2] define the dust rates -- note these will override 
+#     the default value from the default dict (or from 
+#     the batch .csv) unless the entry is non-numeric (
+#     (suggest to make those values 'defer' so it's clear
+#      we're deferring to the default)
+dustrate = ['defer', 0]
+# [3] define dust radius 
+#     as above, 'defer' or other non-numeric means the default
+#     entry will be selected
+dustrad = ['defer', 'defer']
+# [4] define second dust species
+dustsp_2nd = []   # leaving it empty means we use the default (though saying 'defer' should work too)
+dustrate_2nd = [] # leaving it empty means we use the defualt (though saying 'defer' should work too)
+
+
+# %% 
+# --- calculate timestep durations 
+timestep_dur = []
+# Loop through the start_times and calculate the difference
+for i in range(len(start_times)):
+    if i < len(start_times) - 1:
+        # calculate time difference to the next start_time
+        interval = start_times[i+1] - start_times[i]
+    else:
+        # calculate time difference to max_time for the last start_time
+        interval = max_time - start_times[i]
+
+    # Append the interval to the list
+    timestep_dur.append(interval)
+
+# %% 
+# --- BRING TOGETHER 
+# [1] bring lists into a dictionary
+list_dict = {
+    "yr_start": start_times,
+    "duration": timestep_dur,
+    "dustsp": dustsp,
+    "dustrate": dustrate,
+    "dustrad": dustrad,
+    "dustsp_2nd": dustsp_2nd,
+    "dustrate_2nd": dustrate_2nd
+}
+# [2] remove empty lists from the dictionary 
+filtered_data = {key: value for key, value in list_dict.items() if value}
+# [3] create pd.Dataframe
+df = pd.DataFrame(filtered_data)
+df
+# %%
+# --- save result
+df.to_csv(os.path.join(savehere, savename), index=False)
+
+# %%
diff --git a/scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py
@@ -0,0 +1,69 @@
+# %%
+# ---------------------------------------------------
+# 
+# Generate batch input .csv files for SCEPTER run
+# 
+# provide var vectors and assume we want every 
+# combination of them, or by site
+# 
+# T Kukla (CarbonPlan, 2024)
+# 
+# ---------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+import itertools
+import batch_helperFxns as bhf
+
+# %% 
+# --- USER INPUTS
+# [1] vars to update, constant for all runs
+fertLevel = "hi"    # name for how much fertilizer is applied
+dustsp = "gbas"      # name for dust species to apply (must be from accepted list)
+pref = f"{fertLevel}Fert_{dustsp}"
+clim_tag = None   # [string] year-span (e.g., 1950-2020) for clim input if climate files are used
+                  # (if clim files are not used, set to None)
+# save vars
+file_prefix = f"meanAnn_{dustsp}_shortRun_{fertLevel}Fert_gs+apprate"  # prefix of output run names
+fn = file_prefix + "_v0.csv"
+savepath_batch = "/home/tykukla/aglime-swap-cdr/scepter/batch-inputs"
+multi_run_split = False   # whether to split the csv into multiple files
+max_iters_per_set = 20    # [int] the number of runs per csv (only used if multi_run_split is True)
+
+const_dict = {
+    "duration": 15,  # duration of run (starts from earliest year)
+    "dustsp": dustsp,
+    "dustsp_2nd": "amnt",
+    "dustrate_2nd": 30.0,
+    "add_secondary": False,
+    "imix": 1,
+    "singlerun_seasonality": False,
+    "include_psd_full": False,
+    "include_psd_bulk": False,
+    "climatedir": "NA"
+}
+
+# %% 
+# [2] vars to vary by site
+sites = ['site_311a', 'site_311b']
+by_site = {   # values must have same order as 'sites' var
+    "cec": [21.10329, 6.96125],
+    "spinrun": ["site_311a_pr9_spintuneup4", "site_311b_pr9_spintuneup4"],
+    "climatefiles": ["site_311a", "site_311b"]  # these serve as the site name when there is no cliamte file to use
+}
+
+# %% 
+# [3] vars to vary within site (we'll get every combination of these two)
+dustrate_ton_ha_yr = [0.3, 0.6, 1, 2, 5, 7, 10, 15, 25, 35, 45, 60, 100]
+all_combinations = {
+    "dustrate": [x * 100 for x in dustrate_ton_ha_yr],  # [ton ha-1 yr-1 * 100 = g m-2]   
+    "dustrad": [1, 10, 30, 50, 75, 100, 125, 150, 200]  # [diameter, microns] i think this gets applied to gbas and amnt equally (though amnt is fast-reacting so maybe not a big deal? )
+}
+
+
+# %% 
+# --- BUILD DATAFRAME AND SAVE
+df = bhf.build_df(pref, const_dict, sites, by_site, all_combinations, add_ctrl=True)
+# save 
+bhf.save_df(df, savepath_batch, fn, multi_run_split, max_iters_per_set)
+# %%
diff --git a/scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py
@@ -0,0 +1,76 @@
+# %%
+# ---------------------------------------------------
+# 
+# Generate batch input .csv files for SCEPTER run
+# using the rock_buff_dust_ts-multiyear.py script
+# 
+# provide var vectors and assume we want every 
+# combination of them, or by site
+# 
+# T Kukla (CarbonPlan, 2024)
+# 
+# ---------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+import itertools
+import batch_helperFxns as bhf
+
+# %% 
+# --- USER INPUTS
+# [1] vars to update, constant for all runs
+fertLevel = "no"    # name for how much fertilizer is applied
+dustsp = "cc"      # name for dust species to apply (must be from accepted list)
+pref = f"{fertLevel}Fert_{dustsp}_multiyear"
+clim_tag = None   # [string] year-span (e.g., 1950-2020) for clim input if climate files are used
+                  # (if clim files are not used, set to None)
+# save vars
+file_prefix = f"meanAnn_{dustsp}_shortRun_dustMultiyr_{fertLevel}Fert_gs+apprate"  # prefix of output run names
+fn = file_prefix + "_v0.csv"
+savepath_batch = "/home/tykukla/aglime-swap-cdr/scepter/batch-inputs"
+multi_run_split = False   # whether to split the csv into multiple files
+max_iters_per_set = 20    # [int] the number of runs per csv (only used if multi_run_split is True)
+
+const_dict = {
+    "duration": 15,  # duration of run (starts from earliest year)
+    # -- dust timeseries inputs
+    "dust_ts_dir": "/home/tykukla/aglime-swap-cdr/scepter/dust-inputs",
+    "dust_ts_fn": f"{dustsp}_15yr_1app_no2nd_001.csv",
+    # -- 
+    "dustsp": dustsp,
+    "dustsp_2nd": "amnt",
+    "dustrate_2nd": 0,
+    "add_secondary": False,
+    "imix": 1,
+    "singlerun_seasonality": False,
+    "include_psd_full": False,
+    "include_psd_bulk": False,
+    "climatedir": "NA"
+}
+
+# %% 
+# [2] vars to vary by site
+sites = ['site_311a', 'site_311b']
+by_site = {   # values must have same order as 'sites' var
+    "cec": [21.10329, 6.96125],
+    "spinrun": ["site_311a_pr9_spintuneup4", "site_311b_pr9_spintuneup4"],
+    "climatefiles": ["site_311a", "site_311b"]  # these serve as the site name when there is no cliamte file to use
+}
+
+# %% 
+# [3] vars to vary within site (we'll get every combination of these two)
+dustrate_ton_ha_yr = [0.3, 0.6, 1, 2, 5, 7, 10, 15, 25, 35, 45, 60, 100]
+all_combinations = {
+    "dustrate": [x * 100 for x in dustrate_ton_ha_yr],  # [ton ha-1 yr-1 * 100 = g m-2]   
+    "dustrad": [1, 10, 30, 50, 75, 100, 125, 150, 200]  # [diameter, microns] i think this gets applied to gbas and amnt equally (though amnt is fast-reacting so maybe not a big deal? )
+}
+
+
+# %% 
+# --- BUILD DATAFRAME AND SAVE
+df = bhf.build_df(pref, const_dict, sites, by_site, all_combinations, add_ctrl=True)
+df
+# %%
+# save 
+bhf.save_df(df, savepath_batch, fn, multi_run_split, max_iters_per_set)
+# %%
diff --git a/scepter/setup/build_composite_multiyear.py b/scepter/setup/build_composite_multiyear.py
@@ -66,8 +66,13 @@ def build_composite(basename, outdir):
     # ... first get all the outdir paths
     # (these will be passed to the function in the main script!)
     # outdir = "/home/tykukla/SCEPTER/scepter_output"
+    # -- we're having an issue where a basename like 
+    #    mydir_10 returns "mydir_10_startyear*" but it also 
+    #    returns "mydir_100_startyear*". So we add an under-
+    #    score to prevent this but only for finding alldirs
+    runname_base_underscore = basename + "_"
     runname_base = basename
-    alldirs = sorted(find_subdirs_1level(outdir, runname_base))
+    alldirs = sorted(find_subdirs_1level(outdir, runname_base_underscore))
 
     # remove paths with the word "composite"
     alldirs = [path for path in alldirs if "composite" not in os.path.basename(path)]
@@ -199,16 +204,23 @@ def build_composite(basename, outdir):
             # check if file exists
             savedst_tmp = os.path.join(dst_main_flx, fn)
             filecheck = os.path.isfile(savedst_tmp)
-            if filecheck:  # then exclude the header, append the rest
-                dfsrc.to_csv(
-                    savedst_tmp, header=None, index=None, sep="\t", mode="a"
-                )  # mode = "a" will append to end of file if exists
+            if filecheck:  # then read in the existing df and append the new one 
+                # read the existing file into a dataframe
+                existing_df = preprocess_txt(savedst_tmp)
+                # join the existing and source dfs together by column
+                # (join='outer' means a column that only exists in df2 will be kept, with nans in df1)
+                #  and axis=0 means it's concatenated row-wise, not column-wise)
+                new_df = pd.concat([existing_df, dfsrc], axis=0, join='outer')
+                new_df.to_csv(  # default is mode='w' which will overwrite the existing file (that's fine because we've merged it with the new data)
+                    savedst_tmp, index=None, sep="\t"
+                )  
             else:  # if the file doesn't exist, save and include the header
                 dfsrc.to_csv(
                     savedst_tmp, index=None, sep="\t", mode="a"
                 )  # mode = "a" will append to end of file if exists
         # ----------------------------------------------------------
-
+    # return the new output dirs
+    return dst_main_field, dst_main_lab
 
 # # save result or append if one exists
 #     if filecheck: