From 0b0b2d42a88868a457270fc6bf1c7b4e930fa5a6 Mon Sep 17 00:00:00 2001
From: tykukla <tkukla13@gmail.com>
Date: Mon, 11 Nov 2024 22:32:05 -0600
Subject: [PATCH] add c flux calculations

---
 .../singlerun_dynamicApp_multiyear.sh         |   57 +
 scepter/run-multiple.sh                       |    5 +-
 scepter/setup/batch-setup/make-dustflx-csv.py |   84 ++
 .../make_batch_input_grainsize+apprate.py     |   69 +
 ...batch_input_grainsize+apprate_multiyear.py |   76 ++
 scepter/setup/build_composite_multiyear.py    |   24 +-
 scepter/setup/cflx_proc.py                    | 1195 +++++++++++++++++
 scepter/setup/scepter_helperFxns.py           |   81 +-
 8 files changed, 1582 insertions(+), 9 deletions(-)
 create mode 100755 scepter/control-scripts/singlerun_dynamicApp_multiyear.sh
 create mode 100644 scepter/setup/batch-setup/make-dustflx-csv.py
 create mode 100644 scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py
 create mode 100644 scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py
 create mode 100644 scepter/setup/cflx_proc.py

diff --git a/scepter/control-scripts/singlerun_dynamicApp_multiyear.sh b/scepter/control-scripts/singlerun_dynamicApp_multiyear.sh
new file mode 100755
index 0000000..ef21079
--- /dev/null
+++ b/scepter/control-scripts/singlerun_dynamicApp_multiyear.sh
@@ -0,0 +1,57 @@
+#! /usr/bin/bash
+
+# name of run script
+runscript="$1/rock_buff_dust-ts_multiyear.py"
+# model directory
+modeldir="$1"
+# output directory
+outdir="$modeldir/scepter_output/"
+# default dictionary
+default_dict="$5"  # see SCEPTER/defaults/dict_singlerun.py
+
+
+# --- COLLECT INPUTS ---
+# input csv
+input_dir=$2
+input_name=$3
+input_index=$4
+((input_index++))  # add one to skip column names
+
+# check if directory exists
+if [ ! -d "$input_dir" ]; then
+    echo "Error: Directory '$input_dir' does not exist."
+    exit 1
+fi
+# check if file exists
+file_path="$input_dir/$input_name"
+if [ ! -f "$file_path" ]; then
+    echo "Error: File '$file_path' does not exist."
+    exit 1
+fi
+# read data from the specified row number
+row_data=$(sed "${input_index}q;d" "$file_path")
+echo "Data from row $row_number: $row_data"
+
+# read the header row to get column names
+header_row=$(head -n 1 "$file_path")
+IFS=$' , ' read -r -a columns <<< "$header_row"
+# assign values to variables using column names
+IFS=$' , ' read -r -a values <<< "$row_data"
+declare -A params
+for ((i=0; i<${#columns[@]}; i++)); do
+    column_name="${columns[i]}"
+    value="${values[i]}"
+    declare "$column_name"="$value"
+    params["$column_name"]="$value"
+done
+
+# build the Python command
+python_cmd="python3 $runscript --modeldir $modeldir --outdir $outdir --default_dict $default_dict"
+for key in "${!params[@]}"; do
+    python_cmd+=" --$key ${params[$key]}"
+done
+
+# Run the Python script
+echo "running python script with command:"
+echo "$python_cmd" # troubleshoot
+eval "$python_cmd"
diff --git a/scepter/run-multiple.sh b/scepter/run-multiple.sh
index 10ec4cd..c019692 100755
--- a/scepter/run-multiple.sh
+++ b/scepter/run-multiple.sh
@@ -7,7 +7,8 @@
 # batch_base="batch-tph-liming_noFert-multiyear-base.yaml"
 # batch_base="batch-liming_fixedRate-base.yaml"
 # batch_base="batch-basalt_fixedRate-base.yaml"
-batch_base="batch-meanAnnliming_fert_fixedRate-base.yaml"
+batch_base="batch-meanAnnliming_fert_dust-multiyear-base.yaml"
+# batch_base="batch-meanAnnliming_fert_fixedRate-base.yaml"
 # batch_base="batch-tph-multiyear-base.yaml"
 # batch_base="batch-meanAnnliming_fixedRate-base.yaml"
 # batch_base="batch-meanAnnbasalt_fixedRate-base.yaml"
@@ -35,5 +36,5 @@ do
     echo "Running ${i}"
     argo submit scepter-workflow.yaml --parameter-file $paramfile -p batch-index="${i}"
     # [TROUBLESHOOT]
-    sleep 1m  # take a break between submitted jobs :)
+    sleep 15s  # take a break between submitted jobs :)
 done
diff --git a/scepter/setup/batch-setup/make-dustflx-csv.py b/scepter/setup/batch-setup/make-dustflx-csv.py
new file mode 100644
index 0000000..52ac527
--- /dev/null
+++ b/scepter/setup/batch-setup/make-dustflx-csv.py
@@ -0,0 +1,84 @@
+# %%
+# -------------------------------------------------------
+# 
+# Script to make a csv for dust flux data over time 
+# 
+# Note that as of 10/20/2024 it only works with the 
+# SCEPTER/rock_buff_dust-ts_multiyear.py script
+# 
+# -------------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+
+# %%
+# --- set up the save params
+# ***************************************************************
+savehere = "/home/tykukla/aglime-swap-cdr/scepter/dust-inputs"
+savename = "cc_15yr_1app_no2nd_001.csv"
+# ***************************************************************
+
+# %% 
+# --- DEFINE TIME STEPS
+# [1] the total amount of time to split up into sub-runs
+max_time = 15 # [years] the end of the batch simulation
+# [2] list of the years where a new sub-run starts (values
+#     cannot exceed max_time)
+start_times = [0, 1]   # [years]
+
+# --- DEFINE DUST APPLICATION
+# (note, lists must have same length as start_times)
+# [1] define the dust species applied at each timestep 
+dustsp = ['cc', 'cc']
+# [2] define the dust rates -- note these will override 
+#     the default value from the default dict (or from 
+#     the batch .csv) unless the entry is non-numeric (
+#     (suggest to make those values 'defer' so it's clear
+#      we're deferring to the default)
+dustrate = ['defer', 0]
+# [3] define dust radius 
+#     as above, 'defer' or other non-numeric means the default
+#     entry will be selected
+dustrad = ['defer', 'defer']
+# [4] define second dust species
+dustsp_2nd = []   # leaving it empty means we use the default (though saying 'defer' should work too)
+dustrate_2nd = [] # leaving it empty means we use the defualt (though saying 'defer' should work too)
+
+
+# %% 
+# --- calculate timestep durations 
+timestep_dur = []
+# Loop through the start_times and calculate the difference
+for i in range(len(start_times)):
+    if i < len(start_times) - 1:
+        # calculate time difference to the next start_time
+        interval = start_times[i+1] - start_times[i]
+    else:
+        # calculate time difference to max_time for the last start_time
+        interval = max_time - start_times[i]
+    
+    # Append the interval to the list
+    timestep_dur.append(interval)
+
+# %% 
+# --- BRING TOGETHER 
+# [1] bring lists into a dictionary
+list_dict = {
+    "yr_start": start_times,
+    "duration": timestep_dur,
+    "dustsp": dustsp,
+    "dustrate": dustrate,
+    "dustrad": dustrad,
+    "dustsp_2nd": dustsp_2nd,
+    "dustrate_2nd": dustrate_2nd
+}
+# [2] remove empty lists from the dictionary 
+filtered_data = {key: value for key, value in list_dict.items() if value}
+# [3] create pd.Dataframe
+df = pd.DataFrame(filtered_data)
+df
+# %%
+# --- save result
+df.to_csv(os.path.join(savehere, savename), index=False)
+
+# %%
diff --git a/scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py
new file mode 100644
index 0000000..014028e
--- /dev/null
+++ b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate.py
@@ -0,0 +1,69 @@
+# %%
+# ---------------------------------------------------
+# 
+# Generate batch input .csv files for SCEPTER run
+# 
+# provide var vectors and assume we want every 
+# combination of them, or by site
+# 
+# T Kukla (CarbonPlan, 2024)
+# 
+# ---------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+import itertools
+import batch_helperFxns as bhf
+
+# %% 
+# --- USER INPUTS
+# [1] vars to update, constant for all runs
+fertLevel = "hi"    # name for how much fertilizer is applied
+dustsp = "gbas"      # name for dust species to apply (must be from accepted list)
+pref = f"{fertLevel}Fert_{dustsp}"
+clim_tag = None   # [string] year-span (e.g., 1950-2020) for clim input if climate files are used
+                  # (if clim files are not used, set to None)
+# save vars
+file_prefix = f"meanAnn_{dustsp}_shortRun_{fertLevel}Fert_gs+apprate"  # prefix of output run names
+fn = file_prefix + "_v0.csv"
+savepath_batch = "/home/tykukla/aglime-swap-cdr/scepter/batch-inputs"
+multi_run_split = False   # whether to split the csv into multiple files
+max_iters_per_set = 20    # [int] the number of runs per csv (only used if multi_run_split is True)
+
+const_dict = {
+    "duration": 15,  # duration of run (starts from earliest year)
+    "dustsp": dustsp,
+    "dustsp_2nd": "amnt",
+    "dustrate_2nd": 30.0,
+    "add_secondary": False,
+    "imix": 1,
+    "singlerun_seasonality": False,
+    "include_psd_full": False,
+    "include_psd_bulk": False,
+    "climatedir": "NA"
+}
+
+# %% 
+# [2] vars to vary by site
+sites = ['site_311a', 'site_311b']
+by_site = {   # values must have same order as 'sites' var
+    "cec": [21.10329, 6.96125],
+    "spinrun": ["site_311a_pr9_spintuneup4", "site_311b_pr9_spintuneup4"],
+    "climatefiles": ["site_311a", "site_311b"]  # these serve as the site name when there is no cliamte file to use
+}
+
+# %% 
+# [3] vars to vary within site (we'll get every combination of these two)
+dustrate_ton_ha_yr = [0.3, 0.6, 1, 2, 5, 7, 10, 15, 25, 35, 45, 60, 100]
+all_combinations = {
+    "dustrate": [x * 100 for x in dustrate_ton_ha_yr],  # [ton ha-1 yr-1 * 100 = g m-2]   
+    "dustrad": [1, 10, 30, 50, 75, 100, 125, 150, 200]  # [diameter, microns] i think this gets applied to gbas and amnt equally (though amnt is fast-reacting so maybe not a big deal? )
+}
+
+
+# %% 
+# --- BUILD DATAFRAME AND SAVE
+df = bhf.build_df(pref, const_dict, sites, by_site, all_combinations, add_ctrl=True)
+# save 
+bhf.save_df(df, savepath_batch, fn, multi_run_split, max_iters_per_set)
+# %%
diff --git a/scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py
new file mode 100644
index 0000000..ec98c2f
--- /dev/null
+++ b/scepter/setup/batch-setup/make_batch_input_grainsize+apprate_multiyear.py
@@ -0,0 +1,76 @@
+# %%
+# ---------------------------------------------------
+# 
+# Generate batch input .csv files for SCEPTER run
+# using the rock_buff_dust_ts-multiyear.py script
+# 
+# provide var vectors and assume we want every 
+# combination of them, or by site
+# 
+# T Kukla (CarbonPlan, 2024)
+# 
+# ---------------------------------------------------
+import os
+import numpy as np
+import pandas as pd
+import itertools
+import batch_helperFxns as bhf
+
+# %% 
+# --- USER INPUTS
+# [1] vars to update, constant for all runs
+fertLevel = "no"    # name for how much fertilizer is applied
+dustsp = "cc"      # name for dust species to apply (must be from accepted list)
+pref = f"{fertLevel}Fert_{dustsp}_multiyear"
+clim_tag = None   # [string] year-span (e.g., 1950-2020) for clim input if climate files are used
+                  # (if clim files are not used, set to None)
+# save vars
+file_prefix = f"meanAnn_{dustsp}_shortRun_dustMultiyr_{fertLevel}Fert_gs+apprate"  # prefix of output run names
+fn = file_prefix + "_v0.csv"
+savepath_batch = "/home/tykukla/aglime-swap-cdr/scepter/batch-inputs"
+multi_run_split = False   # whether to split the csv into multiple files
+max_iters_per_set = 20    # [int] the number of runs per csv (only used if multi_run_split is True)
+
+const_dict = {
+    "duration": 15,  # duration of run (starts from earliest year)
+    # -- dust timeseries inputs
+    "dust_ts_dir": "/home/tykukla/aglime-swap-cdr/scepter/dust-inputs",
+    "dust_ts_fn": f"{dustsp}_15yr_1app_no2nd_001.csv",
+    # -- 
+    "dustsp": dustsp,
+    "dustsp_2nd": "amnt",
+    "dustrate_2nd": 0,
+    "add_secondary": False,
+    "imix": 1,
+    "singlerun_seasonality": False,
+    "include_psd_full": False,
+    "include_psd_bulk": False,
+    "climatedir": "NA"
+}
+
+# %% 
+# [2] vars to vary by site
+sites = ['site_311a', 'site_311b']
+by_site = {   # values must have same order as 'sites' var
+    "cec": [21.10329, 6.96125],
+    "spinrun": ["site_311a_pr9_spintuneup4", "site_311b_pr9_spintuneup4"],
+    "climatefiles": ["site_311a", "site_311b"]  # these serve as the site name when there is no cliamte file to use
+}
+
+# %% 
+# [3] vars to vary within site (we'll get every combination of these two)
+dustrate_ton_ha_yr = [0.3, 0.6, 1, 2, 5, 7, 10, 15, 25, 35, 45, 60, 100]
+all_combinations = {
+    "dustrate": [x * 100 for x in dustrate_ton_ha_yr],  # [ton ha-1 yr-1 * 100 = g m-2]   
+    "dustrad": [1, 10, 30, 50, 75, 100, 125, 150, 200]  # [diameter, microns] i think this gets applied to gbas and amnt equally (though amnt is fast-reacting so maybe not a big deal? )
+}
+
+
+# %% 
+# --- BUILD DATAFRAME AND SAVE
+df = bhf.build_df(pref, const_dict, sites, by_site, all_combinations, add_ctrl=True)
+df
+# %%
+# save 
+bhf.save_df(df, savepath_batch, fn, multi_run_split, max_iters_per_set)
+# %%
diff --git a/scepter/setup/build_composite_multiyear.py b/scepter/setup/build_composite_multiyear.py
index e67cd2a..9f5da90 100644
--- a/scepter/setup/build_composite_multiyear.py
+++ b/scepter/setup/build_composite_multiyear.py
@@ -66,8 +66,13 @@ def build_composite(basename, outdir):
     # ... first get all the outdir paths
     # (these will be passed to the function in the main script!)
     # outdir = "/home/tykukla/SCEPTER/scepter_output"
+    # -- we're having an issue where a basename like 
+    #    mydir_10 returns "mydir_10_startyear*" but it also 
+    #    returns "mydir_100_startyear*". So we add an under-
+    #    score to prevent this but only for finding alldirs
+    runname_base_underscore = basename + "_"
     runname_base = basename
-    alldirs = sorted(find_subdirs_1level(outdir, runname_base))
+    alldirs = sorted(find_subdirs_1level(outdir, runname_base_underscore))
 
     # remove paths with the word "composite"
     alldirs = [path for path in alldirs if "composite" not in os.path.basename(path)]
@@ -199,16 +204,23 @@ def build_composite(basename, outdir):
             # check if file exists
             savedst_tmp = os.path.join(dst_main_flx, fn)
             filecheck = os.path.isfile(savedst_tmp)
-            if filecheck:  # then exclude the header, append the rest
-                dfsrc.to_csv(
-                    savedst_tmp, header=None, index=None, sep="\t", mode="a"
-                )  # mode = "a" will append to end of file if exists
+            if filecheck:  # then read in the existing df and append the new one 
+                # read the existing file into a dataframe
+                existing_df = preprocess_txt(savedst_tmp)
+                # join the existing and source dfs together by column
+                # (join='outer' means a column that only exists in df2 will be kept, with nans in df1)
+                #  and axis=0 means it's concatenated row-wise, not column-wise)
+                new_df = pd.concat([existing_df, dfsrc], axis=0, join='outer')
+                new_df.to_csv(  # default is mode='w' which will overwrite the existing file (that's fine because we've merged it with the new data)
+                    savedst_tmp, index=None, sep="\t"
+                )  
             else:  # if the file doesn't exist, save and include the header
                 dfsrc.to_csv(
                     savedst_tmp, index=None, sep="\t", mode="a"
                 )  # mode = "a" will append to end of file if exists
         # ----------------------------------------------------------
-
+    # return the new output dirs
+    return dst_main_field, dst_main_lab
 
 # # save result or append if one exists
 #     if filecheck:
diff --git a/scepter/setup/cflx_proc.py b/scepter/setup/cflx_proc.py
new file mode 100644
index 0000000..1a4a5da
--- /dev/null
+++ b/scepter/setup/cflx_proc.py
@@ -0,0 +1,1195 @@
+# ------------------------------------------------
+# 
+# Functions to calculate / synthesize CDR-relevant
+# fluxes for single SCEPTER simulations. 
+# 
+# In order to make the functions usable for a wide 
+# range of SCEPTER run files, fluxes are NOT 
+# compared to a control scenario. Only absolute 
+# fluxes for the single run are considered.
+# 
+# ------------------------------------------------
+# %%
+from scipy.integrate import cumulative_trapezoid
+import numpy as np
+import pandas as pd
+# import xarray as xr
+import pickle
+import math
+import os
+import re
+import glob
+import fnmatch
+
+
+# %% 
+# --- Dict for molar masses of solid species (from Kanzaki et al., 2022; table 1)
+molar_mass_dict = {
+    'amsi': 60.085,
+    'qtz': 60.085,
+    'gb': 78.004,
+    'gt': 88.854,
+    'hm': 159.692,
+    'gps': 172.168,
+    'arg': 100.089,
+    'cc': 100.089,
+    'dlm': 184.403,
+    'ab': 262.225,
+    'kfs': 278.33,
+    'an': 278.311,
+    'fo': 140.694,
+    'fa': 203.778,
+    'en': 100.389,
+    'fer': 131.931,
+    'dp': 216.553,
+    'hb': 248.09,
+    'tm': 812.374,
+    'antp': 780.976,
+    'mscv': 398.311,
+    'plgp': 417.262,
+    'ct': 277.113,
+    'ka': 258.162,
+    'anl': 220.155,
+    'nph': 142.055,
+    'nabd': 367.609,
+    'kbd': 372.978,
+    'cabd': 366.625,
+    'mgbd': 363.996,
+    'ill': 383.90,
+    'g1': 30,
+    'g2': 30,
+    'g3': 30,
+    'amnt': 80.043,  # Kanzaki et al., 2023, table 3
+    # --- see treatment of gbas below (not in table 1 of Kanzaki et al., 2022)
+    'gbas': 120.496
+    # Note: gbas molar mass depends on whether Dmod_bas_cmp is defined in the makefile! 
+    # if the makefile has `CPFLAGS       += -Dmod_basalt_cmp`, then scepter.f90
+    # uses basalt_defines.h by default (see scepter.f90 lines ~81, 82). Otherwise, 
+    # another basalt composition (hard-coded in scepter.f90) is used.  
+    # 
+    # This flag was set in my makefile, so I use the molar mass we compute from 
+    # basalt_defines.h. 
+    # 
+    # --- molar mass is calculated following line ~358 in scepter.f90: 
+    # mwtgbas = (fr_si_gbas*mwtamsi + fr_al_gbas/2*mwtal2o3 + fr_na_gbas/2*mwtna2o
+    #            + fr_k_gbas/2*mwtk2o + fr_ca_gbas*mwtcao + fr_mg_gbas*mwtmgo
+    #            + fr_fe2_gbas*mwtfe2o)
+    # where mwt* is molar mass of species, and fr_*_gbas is the fraction from basalt_defines.h
+    #
+    # si: 1.0 * 60.085                = 60.085
+    # al: 0.4683117231/2 * 101.962    = 23.875
+    # na: 0.08088553318/2 * 61.979    = 2.5066
+    # k:  0.008431137573/2 * 94.195   = 0.3971
+    # ca: 0.2438545566 * 56.079       = 13.675
+    # mg: 0.2721686927 * 40.304       = 10.969
+    # fe2:0.1251095225 * 71.846       = 8.9886
+    # 
+    # SUM --------------------------> = 120.496
+}
+
+
+# %%
+# --- FUNCTION to preprocess .txt files for consistent delimiters
+def preprocess_txt(
+    outdir: str,
+    runname: str,
+    fn: str,
+    run_subdir: str = "flx",
+    map_numeric: bool = True
+    ) -> pd.DataFrame:
+    """
+    Convert SCEPTER output .txt files to pandas DataFrame for further
+    analysis
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    fn : str
+        name of the file in the flux directory to input
+    run_subdir : str
+        name of the subdirectory in the run output directory to find the file (either "prof" or "flux")
+        default is "flx". 
+    map_numeric : bool
+        if True, all columns are mapped to numeric values. This should only be false for dust.txt which has 
+        columns for the dust species
+
+    Returns
+    -------
+    pd.DataFrame
+        same format as the SCEPTER .txt file
+    """
+                    
+    data = []  # Initialize a list to store the processed data
+
+    # Initialize a flag to determine if we are reading the header
+    is_header = True
+
+    # get file path
+    file_path = os.path.join(outdir, runname, run_subdir, fn)
+
+    # Read the file line by line and process the data
+    with open(file_path, 'r') as file:
+        for line in file:
+            line = line.strip()  # Remove leading/trailing whitespace
+            if is_header:
+                # Split the first line into column names
+                column_names = re.split(r'\s+', line)
+                is_header = False
+            else:
+                # Split the other lines into data values
+                values = re.split(r'\s+', line)
+                data.append(values)
+
+    # Create a DataFrame with the processed data and set column names
+    df = pd.DataFrame(data, columns=column_names)
+    # apply pd.to_numeric to all columns using the "map" method
+    if map_numeric:
+        df = df.map(pd.to_numeric)
+    # return
+    return df
+
+def get_data(
+    outdir: str,
+    runname: str,
+    var_fn: str,
+    cdvar: str,
+    run_subdir: str = "flx",
+    get_int: bool = True
+    ) -> pd.DataFrame:
+    """
+    Get the SCEPTER data from defined text files. both timeseries
+    and integrated flux files (e.g., <var_fn>-<cdvar> and int_<var_fn>-<cdvar>)
+    are brought in. Both pandas dataframes are returned.
+    
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    cdvar : str
+        name of the variable used for CDR metric. Should align with var_fn such that format is "[basename]-[cdvar].txt"
+    run_subdir : str
+        name of the subdirectory in the run output directory to find the file (either "prof" or "flux")
+        default is "flx".
+    get_int : bool
+        if True, then get the time-integrated version of the file and return that too
+
+    Returns
+    -------
+    pd.DataFrame
+        timeseries of the carbon flux metric
+    pd.DataFrame
+        integrated timeseries of the carbon flux metric
+    """
+    # get the txt file as a pandas dataframe
+    fn = f"{var_fn}-{cdvar}.txt"   # flux timeseries
+    df = preprocess_txt(outdir, runname, fn, run_subdir = run_subdir)
+    if get_int:
+        fn_int = f"int_{fn}"       # integrated flux timeseries
+        dfint = preprocess_txt(outdir, runname, fn_int, run_subdir = run_subdir)
+    # return result
+    if get_int:
+        return df, dfint
+    else:
+        return df, _
+
+# %%
+# ******************************************************************************
+# ----------------------- CDR METRIC FUNCTIONS ---------------------------------
+def co2_flx(
+    outdir: str,
+    runname: str,
+    var_fn: str = "flx_gas",
+    cdvar: str = "pco2",
+    organic_sp_list: list = ["g1", "g2", "g3"],
+    inorganic_sp_list: list = ["arg", "cc", "dlm"],
+    convert_units: bool = True,
+    co2_g_mol: float=44.01, # molar_mass_dict not used bc the input file should be in mol co2 / m2 / yr
+    ) -> pd.DataFrame:
+    """
+    Get the CO2 diffusive and advective flux over time. Uses the *flx_gas* 
+    files and pco2 variable by default. files are in mol/m2/yr and output 
+    is in g/m2/yr if convert_units=False; ton/ha/yr if convert_units=True.
+    
+    You can use *flx_co2sp* files and DIC variable as well but it does 
+    not include aqueous complexation between CO2 species and some cations, 
+    so there can be some difference from the flx_gas results. 
+    
+    int_* fluxes are multiplied by time so the output is the time-integral.
+
+    **************************************************************
+    --- derivation notes ---
+    the flux balance can be written as: adv = -dif - [sources] - tflx
+    where positive values indicate net advection out. 
+    
+    if our only sources are respiration and cc dissolution, we get:
+    adv = -dif - resp - cc - tflx
+
+    so the advection flux without the inorganic contribution would simply be
+    adv_noinorg = -dif - resp - tflx  
+    [or] adv_noinorg = adv + cc
+    **************************************************************
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    cdvar : str
+        name of the variable used for CDR metric. Should align with var_fn such that format is "[basename]-[cdvar].txt"
+    organic_list : dict
+        dict where keys are the IDs for all possible organic species (required for 
+        resp calculation) and values are molar masses in g/mol 
+        (required to go from mol/m2/yr to mass/m2/yr) (for keys and values, see Kanzaki 
+        et al., 2022; table 1)
+    c_mineral_list : dict
+        dict where keys are the IDs for all carbon-bearing minerals 
+        and values are their molar masses in g/mol (required to go 
+        from mol/m2/yr to mass/m2/yr) (for keys and values, see Kanzaki 
+        et al., 2022; table 1)
+    convert_units : bool
+        if True, convert mol/m2/yr to ton/ha/yr, if False convert mol/m2/yr to g/m2/yr
+    co2_g_mol : float
+        molar mass of CO2 [g mol-1] used to convert mol/m2/yr to g/m2/yr
+
+    Returns
+    -------
+    pd.DataFrame
+        timeseries of the carbon flux metric
+    """
+    # *****************************************
+    # define unit conversion constants
+    # for g/m2/yr to ton/ha/yr
+    ton_g = 1 / 1e6   # [ton g-1]
+    m2_ha = 10e3      # [m2 ha-1]
+    conv_factor = ton_g * m2_ha 
+    # *****************************************
+
+    # get the txt file as a pandas dataframe 
+    df, dfint = get_data(outdir, runname, var_fn, cdvar)
+
+    # find how many organic species are present
+    org_sp = list(set(organic_sp_list) & set(df.columns))
+    org_sp_int = list(set(organic_sp_list) & set(dfint.columns))
+    # find out how many carbon-bearing minerals are present
+    inorg_sp = list(set(inorganic_sp_list) & set(df.columns))
+    inorg_sp_int = list(set(inorganic_sp_list) & set(dfint.columns))
+
+    # pull out the relevant components
+    tdf = df.loc[:, ['time', 'dif', 'tflx', 'adv']].rename(columns={'time': 'time', 
+                                                        'dif': 'co2flx_dif', 'tflx': 'co2flx_tflx',
+                                                        'adv': 'co2flx_adv'})
+    tdfint = dfint.loc[:, ['time', 'dif', 'tflx', 'adv']].rename(columns={'time': 'time', 
+                                                        'dif': 'co2flx_dif', 'tflx': 'co2flx_tflx',
+                                                        'adv': 'co2flx_adv'})
+
+    
+    # --- organic and inorganic contributions
+    # compute resp component and add to tdf(int)
+    if len(org_sp) > 0:  # if no org species are present, skip
+        for sp in org_sp:
+            tdf[sp] = df[sp]
+        tdf['co2flx_resp'] = df[org_sp].sum(axis=1)  # add inorg fluxes together
+    if len(org_sp_int) > 0:  # if no org species are present, skip
+        for sp in org_sp_int:
+            tdfint[sp] = dfint[sp]
+        tdfint['co2flx_resp'] = dfint[org_sp_int].sum(axis=1)  # add inorg fluxes together
+        
+    # compute inorg component and add to tdf(int)
+    if len(inorg_sp) > 0:  # if no org species are present, skip
+        for sp in inorg_sp:
+            tdf[sp] = df[sp]
+        tdf['co2flx_inorg'] = df[inorg_sp].sum(axis=1)  # add inorg fluxes together
+        tdf['co2flx_adv_noinorg'] = tdf['co2flx_adv'] + tdf['co2flx_inorg']
+    if len(inorg_sp_int) > 0:  # if no org species are present, skip
+        for sp in inorg_sp_int:
+            tdfint[sp] = dfint[sp]
+        tdfint['co2flx_inorg'] = dfint[inorg_sp_int].sum(axis=1)  # add inorg fluxes together
+        tdfint['co2flx_adv_noinorg'] = tdfint['co2flx_adv'] + tdfint['co2flx_inorg']
+    # --- convert units 
+    if convert_units:  # convert mol/m2/yr to ton/ha/yr
+        tdfint = tdfint.apply(lambda x: x * co2_g_mol * conv_factor if x.name not in ['time', 'units'] else x)
+        tdf = tdf.apply(lambda x: x * co2_g_mol * conv_factor if x.name not in ['time', 'units'] else x)
+        # add units columns
+        tdfint['units'] = 'ton ha-1'
+        tdf['units'] = 'ton ha-1 yr-1'
+    else: # convert mol/m2/yr to g/m2/yr
+        tdfint = tdfint.apply(lambda x: x * co2_g_mol if x.name not in ['time', 'units'] else x)
+        tdf = tdf.apply(lambda x: x * co2_g_mol if x.name not in ['time', 'units'] else x)
+        # add units columns
+        tdfint['units'] = 'g m-2'
+        tdf['units'] = 'g m-2 yr-1'
+    
+    # multiply tdfint columns by time (required to output time-integrated fluxes)
+    tdfint = tdfint.apply(lambda x: x * tdfint['time'] if x.name not in ['time', 'units'] else x)
+
+    
+    # --- tidy up output
+    tdfint['flx_type'] = "int_flx"
+    tdf['flx_type'] = "flx"
+    tdfint['runname'] = tdf['runname'] = runname
+    tdfint['var'] = tdf['var'] = cdvar
+    
+    # combine 
+    outdf = pd.concat([tdf, tdfint], axis=0, ignore_index=True)
+    return outdf
+
+
+def sld_flx(
+    outdir: str,
+    runname: str,
+    feedstock: str,
+    var_fn: str = "flx_sld",
+    dust_from_file: bool = True,
+    molar_mass_dict: dict = molar_mass_dict
+    ) -> pd.DataFrame:
+    """
+    Get the feedstock fluxes. Uses the *flx_sld* files for the feedstock. Only
+    time-integrated fluxes are returned. integrated dust application can be 
+    computed from the dust.txt file if "dust_from_file"=True (this is 
+    required for re-application runs which are composites of multiple 
+    individual runs)
+
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    feedstock : str
+        name of the feedstock of rock applied to the system. Format is "[basename]-[feedstock].txt"
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    dust_from_file : bool
+        if True, then compute integrated dust fluxes from the dust.txt file. Should be 
+        set to True for re-application runs since the int_sld* files are patched 
+        together from multiple 1-year runs, so they don't reflect the true time integral
+        (just the timeSTEP integral). This ensures that the integral is correct
+    molar_mass_dict : dict
+        dictionary where keys are species IDs and values are molar masses in [g mol-1]
+        taken from Kanzaki et al., 2022 GMD; Table 1. 
+
+    Returns
+    -------
+    pd.DataFrame
+        timeseries of the carbon flux metric
+    """
+    # *****************************************
+    # define unit conversion constants
+    # for g/m2/yr to ton/ha/yr
+    ton_g = 1 / 1e6   # [ton g-1]
+    m2_ha = 10e3      # [m2 ha-1]
+    conv_factor = ton_g * m2_ha 
+    # *****************************************
+
+    # get the txt file as a pandas dataframe 
+    df, dfint = get_data(outdir, runname, var_fn, cdvar=feedstock)
+
+    # multiply tdfint columns by time (required to output time-integrated fluxes)
+    dfint = dfint.apply(lambda x: x * dfint['time'] if x.name != 'time' else x)
+    
+    # add dust if needed
+    # [note] no unit conversion from mol/m2/yr to g/m2/yr is needed if 
+    # dust_from_file because that is calculated based on application in g/m2/yr 
+    if dust_from_file: 
+        dfdust0 = preprocess_txt(outdir, runname, fn='dust.txt',
+                                run_subdir = "flx", map_numeric = False)
+        # map numeric columns to numerics
+        dfnum = dfdust0.drop(columns=['dustsp1', 'dustsp2']).map(pd.to_numeric)
+        # add other columns back
+        dfdust = dfnum.copy()
+        dfdust['dustsp1'] = dfdust0['dustsp1'].copy() 
+        dfdust['dustsp2'] = dfdust0['dustsp2'].copy() 
+        
+        # --- identify the feedstock index
+        columns_with_fs = [col for col in dfdust.columns if (dfdust[col] == feedstock).any()]
+        if len(columns_with_fs) > 0:
+            fscol = columns_with_fs[0] # should only return one column, so we take the 0 index
+        else:
+            fscol = "not found"
+        if fscol == "dustsp1":
+            dust_dx = '1'
+        elif fscol == "dustsp2":
+            dust_dx = '2'
+        else:    # assume it's the first index if it doesn't exist
+            dust_dx = '1'
+        # re-calculate integral because right now it's integrated by timestep, not
+        # by the entire run itself
+        dfdust['int_dust_g_m2_yr'] = cumulative_trapezoid(dfdust[f'dust{dust_dx}_g_m2_yr'], dfdust['time'], initial=0)
+        # add dust data 
+        if len(dfdust) != len(df): # if mis-matched, then match them
+            # drop duplicates in the 'time' column, keeping first occurrence
+            dfdust_nodup = dfdust.drop_duplicates(subset='time', keep='first')
+            # then interpolate the data 
+            # merge the DataFrames on 'time' using outer join to keep all time points
+            df_merged = pd.merge(df, dfdust_nodup, on='time', how='outer', suffixes=('', '_orig'))
+            # interpolate the columns of interest 
+            df_merged['int_dust_g_m2_yr'] = df_merged['int_dust_g_m2_yr'].interpolate()
+            # keep only the points in the df time steps
+            intdust = df_merged[df_merged['time'].isin(df['time'])]['int_dust_g_m2_yr'].values / 100 # divide by 100 to convert g/m2/yr to ton/ha/yr
+        else:
+            intdust = dfdust['int_dust_g_m2_yr'].values / 100 # divide by 100 to convert g/m2/yr to ton/ha/yr
+        # add integrated dust
+        dfint['int_dust_ton_ha_yr'] = intdust
+    else:
+        # use rain dust (already multiplied by time, so it's time-integrated)
+        # multiply by -1 to get positive values into the soil column
+        # multiply by molar_mass_dict[feedstock] to get mol/m2/yr in g/m2/yr
+        # multiply by conv_factor go get g/m2/yr to ton/ha/yr
+        dfint['int_dust_ton_ha_yr'] = -1 * dfint['rain'] * molar_mass_dict[feedstock] * conv_factor # (note, we multiplied rain by time earlier so we don't have to do it here)
+
+    # convert other variables to ton/ha/yr 
+    dfint['adv'] = dfint['adv'] * molar_mass_dict[feedstock] * conv_factor
+    dfint[feedstock] = dfint[feedstock] * molar_mass_dict[feedstock] * conv_factor
+
+    # pull out just the columns we want
+    tdfint = dfint.loc[:, ['time', 'int_dust_ton_ha_yr', 'adv', feedstock]]
+    # compute dissolution
+    tdfint['dust_minus_adv'] = tdfint['int_dust_ton_ha_yr'] - tdfint['adv'] # dust that's left after solid advection
+    tdfint['total_dissolution'] = tdfint[feedstock]  # net dissolution
+    tdfint['fraction_sld_advected'] = tdfint['adv'] / tdfint['int_dust_ton_ha_yr']
+    tdfint['fraction_sld_remaining'] = (tdfint['int_dust_ton_ha_yr'] - tdfint['adv'] - tdfint[feedstock]) / tdfint['int_dust_ton_ha_yr']
+    # fraction of non-advected rock that gets dissolved
+    tdfint['fraction_remaining_dissolved'] = tdfint['total_dissolution'] / tdfint['dust_minus_adv']
+    # fraction of total applied rock that gets dissolved
+    tdfint['fraction_total_dissolved'] = tdfint['total_dissolution'] / tdfint['int_dust_ton_ha_yr']
+
+    # --- return result
+    return tdfint
+    
+    
+def carbAlk_adv(
+    outdir: str,
+    runname: str,
+    var_fn: str = "flx_co2sp",
+    cdvar: str = "ALK",
+    convert_units: bool = True,
+    co2potential_g_mol_sil: float=88.02, # potential grams of CO2 per mole of alkalinity assuming 2:1 DIC_CO2:ALK
+    co2potential_g_mol_cc: float=44.01, # potential grams of CO2 per mole of alkalinity assuming 1:1 DIC_CO2:ALK
+    ) -> pd.DataFrame:
+    """
+    Get the advective and storage fluxes of carbonate alkalinity over time. 
+    Uses the *flx_co2sp* files and ALK variable by default. files are in 
+    mol/m2/yr and output is same or in [mass]/m2/yr for co2potential.
+    
+    carbonate alkalinity is defined as: ALK = [HCO3]- + 2[CO3]--. 
+    
+    int_* fluxes are multiplied by time so the output is the time-integral.
+
+    Note, diffusive flux is generally negligibly small and source / sink 
+    contributions are not computed in the input files so they're ignored here. 
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    cdvar : str
+        name of the variable used for CDR metric. Should align with var_fn such that format is "[basename]-[cdvar].txt"
+    convert_units : bool
+        if True, convert mol/m2/yr to ton/ha/yr, if False convert mol/m2/yr to g/m2/yr
+    co2potential_g_mol_sil : float
+        molar mass of CO2 [g mol-1] used to convert mol/m2/yr to g/m2/yr. This refers to the potential 
+        CO2 sequestration by a silicate feedstock where you could get two moles of DIC from CO2 per mole 
+        alkalinity
+    co2potential_g_mol_cc : float
+        molar mass of CO2 [g mol-1] used to convert mol/m2/yr to g/m2/yr. This refers to the potential 
+        CO2 sequestration by a carbonate feedstock where you could get one mole of DIC from CO2 per mole 
+        alkalinity
+
+    Returns
+    -------
+    pd.DataFrame
+        timeseries of the carbonate alkalinity flux metrics
+    """
+    # *****************************************
+    # define unit conversion constants
+    # for g/m2/yr to ton/ha/yr
+    ton_g = 1 / 1e6   # [ton g-1]
+    m2_ha = 10e3      # [m2 ha-1]
+    conv_factor = ton_g * m2_ha 
+    # *****************************************
+
+    # get the txt file as a pandas dataframe 
+    df, dfint = get_data(outdir, runname, var_fn, cdvar)
+
+    # pull out the relevant components
+    tdf = df.loc[:, ['time', 'tflx', 'adv']].rename(columns={'time': 'time', 
+                                                        'tflx': 'calkflx_tflx',
+                                                        'adv': 'calkflx_adv'})
+    tdfint = dfint.loc[:, ['time', 'tflx', 'adv']].rename(columns={'time': 'time', 
+                                                        'tflx': 'calkflx_tflx',
+                                                        'adv': 'calkflx_adv'})
+    # get total carbonate alkalinity flux (add the storage back in)
+    tdf['calkflx_tot'] = tdf['calkflx_adv'] + tdf['calkflx_tflx']
+    tdfint['calkflx_tot'] = tdfint['calkflx_adv'] + tdfint['calkflx_tflx']
+
+    # compute potential flux
+    if convert_units:  # get co2 potentials in ton/ha/yr
+        # advective flx
+        tdf['co2pot_adv_tonHaYr_sil'] = tdf['calkflx_adv'] * co2potential_g_mol_sil * conv_factor
+        tdf['co2pot_adv_tonHaYr_cc'] = tdf['calkflx_adv'] * co2potential_g_mol_cc * conv_factor
+        tdfint['co2pot_adv_tonHa_sil'] = tdfint['calkflx_adv'] * co2potential_g_mol_sil * conv_factor
+        tdfint['co2pot_adv_tonHa_cc'] = tdfint['calkflx_adv'] * co2potential_g_mol_cc * conv_factor
+        # advective + storage flx
+        tdf['co2pot_tot_tonHaYr_sil'] = tdf['calkflx_tot'] * co2potential_g_mol_sil * conv_factor
+        tdf['co2pot_tot_tonHaYr_cc'] = tdf['calkflx_tot'] * co2potential_g_mol_cc * conv_factor
+        tdfint['co2pot_tot_tonHa_sil'] = tdfint['calkflx_tot'] * co2potential_g_mol_sil * conv_factor
+        tdfint['co2pot_tot_tonHa_cc'] = tdfint['calkflx_tot'] * co2potential_g_mol_cc * conv_factor
+
+    else: # get co2 potentials in g/m2/yr
+        # advective flx
+        tdf['co2pot_adv_gm2Yr_sil'] = tdf['calkflx_adv'] * co2potential_g_mol_sil 
+        tdf['co2pot_adv_gm2Yr_cc'] = tdf['calkflx_adv'] * co2potential_g_mol_cc 
+        tdfint['co2pot_adv_gm2_sil'] = tdfint['calkflx_adv'] * co2potential_g_mol_sil 
+        tdfint['co2pot_adv_gm2_cc'] = tdfint['calkflx_adv'] * co2potential_g_mol_cc 
+        # advective + storage flx
+        tdf['co2pot_tot_gm2Yr_sil'] = tdf['calkflx_tot'] * co2potential_g_mol_sil 
+        tdf['co2pot_tot_gm2Yr_cc'] = tdf['calkflx_tot'] * co2potential_g_mol_cc 
+        tdfint['co2pot_tot_gm2_sil'] = tdfint['calkflx_tot'] * co2potential_g_mol_sil 
+        tdfint['co2pot_tot_gm2_cc'] = tdfint['calkflx_tot'] * co2potential_g_mol_cc 
+
+    # multiply tdfint columns by time (required to output time-integrated fluxes)
+    tdfint = tdfint.apply(lambda x: x * tdfint['time'] if x.name not in ['time'] else x)
+
+    
+    # --- tidy up output
+    tdf['units'] = 'mol m-2 yr'
+    tdfint['units'] = 'mol m-2'
+    tdfint['flx_type'] = "int_flx"
+    tdf['flx_type'] = "flx"
+    tdfint['runname'] = tdf['runname'] = runname
+    tdfint['var'] = tdf['var'] = cdvar
+    
+    # combine 
+    outdf = pd.concat([tdf, tdfint], axis=0, ignore_index=True)
+    return outdf
+
+
+def sumCat_adv(
+    outdir: str,
+    runname: str,
+    var_fn: str = "flx_aq",
+    catvars_charge: dict = {"ca": 2, "mg": 2, "k": 1, "na": 1},
+    convert_units: bool = True
+    ) -> pd.DataFrame:
+    """
+    Get the advective and storage fluxes of the sum of cations over time. 
+    Uses the *flx_aq* files and cation variable by default. files are in 
+    mol/m2/yr and output is same or in [mass]/m2/yr for co2potential. 
+    
+    int_* fluxes are multiplied by time so the output is the time-integral.
+
+    Note, diffusive flux is generally negligibly small and source / sink 
+    contributions are not computed in the input files so they're ignored here. 
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    catvars_charge : dict
+        dict of variable used for cation metric. key is the variable, value is the charge. 
+        Should align with var_fn such that format is "[basename]-[cdvar].txt"
+    convert_units : bool
+        if True, convert mol/m2/yr to ton/ha/yr, if False convert mol/m2/yr to g/m2/yr
+
+    Returns
+    -------
+    dict
+        dict of pd.DataFrames, one for each species
+    pd.DataFrame
+        summary timeseries cation fluxes
+    """
+    # *****************************************
+    # define unit conversion constants
+    # for g/m2/yr to ton/ha/yr
+    ton_g = 1 / 1e6   # [ton g-1]
+    m2_ha = 10e3      # [m2 ha-1]
+    conv_factor = ton_g * m2_ha 
+    # *****************************************
+
+    # find which cations are tracked
+    catvars_exist = find_cations(outdir, runname, var_fn=var_fn, catvars_charge=catvars_charge)
+    
+    # loop through cation files and compile
+    out_dict = {}
+    for cat in catvars_exist:
+        # get cation charge
+        ccharge = catvars_charge[cat]
+        # get the txt file as a pandas dataframe 
+        tmpdf, tmpdfint = get_data(outdir, runname, var_fn, cat)
+        # get the summary file for this cation that we'll save 
+        outdf_cat, nonintdf, intdf = build_cation_df(runname, cat, ccharge, tmpdf, tmpdfint)
+        # add to the output dictionary
+        out_dict[cat] = outdf_cat
+
+
+        # get the columns we want and append with charge because we'll multiply them by their charge shortly
+        tdf = nonintdf.loc[:, ['time', 'tflx', 'adv', 'carbsld_source', 'noncarbsld_source']].rename(
+            columns={'time': 'time', 'tflx': 'tflx_charge', 'adv': 'adv_charge',
+                    'carbsld_source': 'carbsld_source_charge', 'noncarbsld_source': 'noncarbsld_source_charge'}
+        )
+        tdfint = intdf.loc[:, ['time', 'tflx', 'adv', 'carbsld_source', 'noncarbsld_source']].rename(
+            columns={'time': 'time', 'tflx': 'tflx_charge', 'adv': 'adv_charge',
+                    'carbsld_source': 'carbsld_source_charge', 'noncarbsld_source': 'noncarbsld_source_charge'}
+        )
+
+        # get their charge flux
+        tdf = tdf.apply(lambda x: x * ccharge if x.name not in ['time'] else x)
+        tdfint = tdfint.apply(lambda x: x * ccharge if x.name not in ['time'] else x)
+
+        # --- compute charge adjusted cation flux
+        # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+        # compute potential flux
+        if convert_units:  # get co2 potentials in ton/ha/yr
+            # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+            tdf['co2pot_adv_tonHaYr'] = nonintdf['co2pot_adv_tonHaYr']
+            tdfint['co2pot_adv_tonHa'] = intdf['co2pot_adv_tonHa']
+            # advective plus storage flx (sum of -noncarbsld_source -carbsld_source/2)
+            tdf['co2pot_tot_tonHaYr'] = nonintdf['co2pot_tot_tonHaYr']
+            tdfint['co2pot_tot_tonHa'] = intdf['co2pot_tot_tonHa']
+
+        else: # get co2 potentials in g/m2/yr
+            # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+            tdf['co2pot_adv_gm2Yr'] = nonintdf['co2pot_adv_gm2Yr']
+            tdfint['co2pot_adv_gm2'] = intdf['co2pot_adv_gm2']
+            # advective plus storage flx (sum of -noncarbsld_source -carbsld_source/2)
+            tdf['co2pot_tot_gm2Yr'] = nonintdf['co2pot_tot_gm2Yr']
+            tdfint['co2pot_tot_gm2'] = intdf['co2pot_tot_gm2']
+
+        # if it's the first loop, create the output dataframe
+        if cat == catvars_exist[0]:
+            outdf = tdf.copy()
+            outdfint = tdfint.copy()
+        else:
+            # add all columns except time (effectively adding the new cation into the mix)
+            outdf = outdf.drop(columns='time').copy() + tdf.drop(columns='time').copy()
+            outdfint = outdfint.drop(columns='time').copy() + tdfint.drop(columns='time').copy()
+            # add time back in
+            outdf.insert(0, 'time', tdf['time'])
+            outdfint.insert(0, 'time', tdfint['time'])
+            
+    # multiply tdfint columns by time (required to output time-integrated fluxes)
+    outdfint = outdfint.apply(lambda x: x * outdfint['time'] if x.name not in ['time'] else x)
+
+    # --- tidy up output
+    outdf['units'] = 'mol m-2 yr x charge'
+    outdfint['units'] = 'mol m-2 x charge'
+    outdfint['flx_type'] = "int_flx"
+    outdf['flx_type'] = "flx"
+    outdfint['runname'] = outdf['runname'] = runname
+    outdfint['var'] = outdf['var'] = '+'.join(catvars_exist)
+    
+    # combine 
+    outdfx = pd.concat([outdf, outdfint], axis=0, ignore_index=True)
+    return outdfx, out_dict
+        
+
+def find_cations(
+    outdir: str,
+    runname: str,
+    var_fn: str = "flx_aq",
+    catvars_charge: dict = {"ca": 2, "mg": 2, "k": 1, "na": 1}
+    ) -> list:
+    """
+    Get a list of cation files present in the flux output.
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    var_fn : str
+        base name of the SCEPTER flx file. Format is "[basename]-[cdvar].txt"
+    catvars_charge : dict
+        dict of variable used for cation metric. key is the variable, value is the charge. 
+        Should align with var_fn such that format is "[basename]-[cdvar].txt"
+    
+    Returns
+    -------
+    list 
+        list of catvars that are presenet in the output. 
+    """
+    # create empty list to hold existing catvars
+    outlist = []
+    for key, val in catvars_charge.items():
+        # build the path / fn
+        fn = f"{var_fn}-{key}.txt"   # flux timeseries
+        file_path = os.path.join(outdir, runname, "flx", fn)
+        if os.path.exists(file_path):
+            outlist.append(key)
+    # return result
+    return outlist
+
+
+def build_cation_df(
+    runname: str, 
+    cation: str,
+    ccharge: float,
+    tmpdf: pd.DataFrame, 
+    tmpdfint: pd.DataFrame,
+    inorganic_sp_list: list = ['cc', 'dlm', 'arg'],
+    negligible_val: float = 1e-7,
+    co2potential_g_mol: float=44.01,
+    convert_units: bool=True
+    )-> pd.DataFrame:
+    """
+    read in the two cation dataframes, clean them up, return them
+
+    Parameters
+    ----------
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    cation : str
+        name of the cation in the cation list
+    ccharge : float
+        value of cation charge
+    tmpdf : pd.DataFrame
+        dataframe generated in the for loop of sumCat_adv function
+    tmpdfint : pd.DataFrame
+        time integrated dataframe generated in the for loop of sumCat_adv function
+    inorganic_sp_list : list
+        list of all the inorganic species that we deduct half c potential removal from
+    negligible_val : float
+        if any column has all its values below this value, we remove it from the df
+    co2potential_g_mol : float
+        molar mass of co2
+    convert_units : bool
+        if True, convert mol/m2/yr to ton/ha/yr, if False convert mol/m2/yr to g/m2/yr
+
+    Returns
+    -------
+    pd.DataFrame
+        one for tmpdf, one for tmpdfint
+    """
+    # *****************************************
+    # define unit conversion constants
+    # for g/m2/yr to ton/ha/yr
+    ton_g = 1 / 1e6   # [ton g-1]
+    m2_ha = 10e3      # [m2 ha-1]
+    conv_factor = ton_g * m2_ha 
+    # *****************************************
+
+    # list of columns we cannot trim
+    columns_to_keep = ["time", "adv", "tflx", "res"]
+    # remove empty or negligible columns (to simplify) (but keep "time" and "res" no matter what)
+    tmpdf = tmpdf.loc[:, (np.abs(tmpdf) > 1e-7).any(axis=0) | tmpdf.columns.isin(columns_to_keep)].copy()
+    tmpdfint = tmpdfint.loc[:, (np.abs(tmpdfint) > 1e-7).any(axis=0) | tmpdfint.columns.isin(columns_to_keep)].copy()
+    
+    # find out how many carbon-bearing minerals are present
+    inorg_sp = list(set(inorganic_sp_list) & set(tmpdf.columns))
+    inorg_sp_int = list(set(inorganic_sp_list) & set(tmpdfint.columns))
+    # get other sources
+    exclude_cols = inorg_sp + ['time', 'tflx', 'adv', 'res']
+    exclude_cols_int = inorg_sp_int + ['time', 'tflx', 'adv', 'res']
+    noninorg_sources = tmpdf.columns.difference(exclude_cols, sort=False)
+    noninorg_sources_int = tmpdfint.columns.difference(exclude_cols, sort=False)
+
+    # add columns for individual sources times the charge
+    # -- noninorganic sources
+    if len(noninorg_sources) > 0:
+        tmpdf.loc[:, 'noncarbsld_source'] = tmpdf[noninorg_sources].sum(axis=1) 
+    else:
+        tmpdf['noncarbsld_source'] = 0.
+    if len(noninorg_sources_int) > 0:
+        tmpdfint.loc[:, 'noncarbsld_source'] = tmpdfint[noninorg_sources_int].sum(axis=1) 
+    else:
+        tmpdfint['noncarbsld_source'] = 0.
+    
+    # -- inorganic sources
+    if len(inorg_sp) > 0:
+        tmpdf.loc[:, 'carbsld_source'] = tmpdf[inorg_sp].sum(axis=1)
+    else:
+        tmpdf['carbsld_source'] = 0.
+    if len(inorg_sp_int) > 0:
+        tmpdfint.loc[:, 'carbsld_source'] = tmpdfint[inorg_sp_int].sum(axis=1)
+    else:
+        tmpdfint['carbsld_source'] = 0.
+    # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+    adv_cat = -tmpdf['tflx'] - tmpdf['noncarbsld_source'] - tmpdf['carbsld_source']/2
+    adv_cat_int = -tmpdfint['tflx'] - tmpdfint['noncarbsld_source'] - tmpdfint['carbsld_source']/2
+    # advective plus storage flx (sum of -noncarbsld_source -carbsld_source/2)
+    tot_cat = -tmpdf['noncarbsld_source'] - tmpdf['carbsld_source']/2
+    tot_cat_int = -tmpdfint['noncarbsld_source'] - tmpdfint['carbsld_source']/2
+
+    # compute potential flux
+    if convert_units:  # get co2 potentials in ton/ha/yr
+        # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+        tmpdf['co2pot_adv_tonHaYr'] = adv_cat * ccharge * co2potential_g_mol * conv_factor
+        tmpdfint['co2pot_adv_tonHa'] = adv_cat_int * ccharge * co2potential_g_mol * conv_factor
+        # advective plus storage flx (sum of -noncarbsld_source -carbsld_source/2)
+        tmpdf['co2pot_tot_tonHaYr'] = tot_cat * ccharge * co2potential_g_mol * conv_factor
+        tmpdfint['co2pot_tot_tonHa'] = tot_cat_int * ccharge * co2potential_g_mol * conv_factor
+
+    else: # get co2 potentials in g/m2/yr
+        # advective flx (sum of -tflx -noncarbsld_source -carbsld_source/2)
+        tmpdf['co2pot_adv_gm2Yr'] = adv_cat * ccharge * co2potential_g_mol 
+        tmpdfint['co2pot_adv_gm2'] = adv_cat_int * ccharge * co2potential_g_mol 
+        # advective plus storage flx (sum of -noncarbsld_source -carbsld_source/2)
+        tmpdf['co2pot_tot_gm2Yr'] = tot_cat * ccharge * co2potential_g_mol 
+        tmpdfint['co2pot_tot_gm2'] = tot_cat_int * ccharge * co2potential_g_mol 
+
+    # add qualitative columns
+    tmpdf['units'] = 'mol m-2 yr'
+    tmpdfint['units'] = 'mol m-2'
+    tmpdfint['flx_type'] = "int_flx"
+    tmpdf['flx_type'] = "flx"
+    tmpdfint['runname'] = tmpdf['runname'] = runname
+    tmpdfint['cation'] = tmpdf['cation'] = cation
+    tmpdfint['charge'] = tmpdf['charge'] = ccharge
+
+    # --- return
+    # combine 
+    outdf = pd.concat([tmpdf, tmpdfint], axis=0, ignore_index=True)
+    return outdf, tmpdf, tmpdfint
+
+
+
+
+# %% 
+# ******************************************************************************
+# -------------------------- CDR METRIC MAIN -----------------------------------
+def cflx_calc(
+    outdir: str,
+    runname: str,
+    feedstock: str,
+    dust_from_file: bool = True,
+    convert_units: bool = True,
+    save_dir: str = 'postproc_flxs',
+    calc_list: list = ['co2_flx', 'carbAlk_adv', 'sumCat_adv', 'sld_flx']
+    ):
+    """
+    compute various cdr-relevant c flux metrics in the postprocessing
+    of a SCEPTER run. Save the results
+
+    Parameters
+    ----------
+    outdir : str
+        path to the SCEPTER output directory. Usually something like 'my/path/SCEPTER/scepter_output'
+    runname : str
+        name of the SCEPTER run (equivalent to the directory within outdir). Generally <RUNNAME>_field.
+    feedstock : str
+        ID of the feedstock used (for rock dissolution calculation)
+    dust_from_file : bool
+        if True, then compute integrated dust fluxes from the dust.txt file. Should be 
+        set to True for re-application runs since the int_sld* files are patched 
+        together from multiple 1-year runs, so they don't reflect the true time integral
+        (just the timeSTEP integral). This ensures that the integral is correct
+    convert_units : bool
+        if True, convert mol/m2/yr to ton/ha/yr, if False convert mol/m2/yr to g/m2/yr
+    save_dir : str
+        the name of the subdirectory in the run dir where the cflux files get saved
+    calc_list : list
+        list of all the functions we're going to run
+
+    Results
+    -------
+    [ no output - fxn saves the results ]
+    """
+    # where to save the results
+    savehere = os.path.join(outdir, runname, save_dir)
+    # make dir if it doesn't exist
+    if not os.path.exists(savehere):
+        os.makedirs(savehere)
+
+
+    # [1] --- CO2 FLUXES ------------------------------------------------
+    if "co2_flx" in calc_list:
+        co2df = co2_flx(outdir, runname, var_fn = "flx_gas", cdvar= "pco2", 
+                        organic_sp_list = ["g1", "g2", "g3"],
+                        inorganic_sp_list = ["arg", "cc", "dlm"],
+                        convert_units = convert_units,
+                        co2_g_mol = 44.01
+                        ) 
+        # save
+        savename = "co2_flxs.pkl"
+        co2df.to_pickle(os.path.join(savehere, savename))
+
+    
+    # [2] --- CARBONATE ALKALINITY FLUXES ---------------------------------
+    if "carbAlk_adv" in calc_list:
+        cAlkdf = carbAlk_adv(outdir, runname, var_fn = "flx_co2sp", cdvar = "ALK",
+                            convert_units = convert_units,
+                            co2potential_g_mol_sil = 88.02, 
+                            co2potential_g_mol_cc = 44.01
+                            )
+        # save
+        savename = "carbAlk_flxs.pkl"
+        cAlkdf.to_pickle(os.path.join(savehere, savename))
+
+
+    # [3] --- SUM OF CATION FLUXES -----------------------------------------
+    if "sumCat_adv" in calc_list: 
+        sumCatdf, sumCatdict = sumCat_adv(outdir, runname, var_fn = "flx_aq",
+                                          catvars_charge = {"ca": 2, "mg": 2, "k": 1, "na": 1},
+                                          convert_units = True)
+        # save the main file
+        savename_main = "carbAlk_flxs.pkl"
+        sumCatdf.to_pickle(os.path.join(savehere, savename_main))
+        
+        # save the output dictionary
+        for name, df in sumCatdict.items():
+            # create file name using the dictionary key
+            savename = f'cationflx_{name}.pkl'
+            # save
+            df.to_pickle(os.path.join(savehere, savename))
+    
+
+    # [4] --- ROCK DISSOLUTION ----------------------------------------------
+    if "sld_flx" in calc_list: 
+        # feedstock must be in list to loop through
+        if not isinstance(feedstock, list):
+            feedstock = [feedstock]
+        for fs in feedstock:
+            rockdf = sld_flx(outdir, runname, fs, var_fn = "flx_sld",
+                            dust_from_file = True,
+                            molar_mass_dict = molar_mass_dict)
+            # save
+            savename = f"rockflx_{fs}.pkl"
+            rockdf.to_pickle(os.path.join(savehere, savename))
+
+
+
+
+
+# ------- SCRATCH ----------------------------------------------------- # 
+# %%
+# outdir = "/home/tykukla/SCEPTER/scepter_output"
+# # runname = "noFert_cc_multiyear_site_311b_app_10000p0_psize_75_composite_field"
+# runname = "midfert_cc_311a_0p0_cc_field_tau15p0"
+# feedstock = ['cc', 'amnt']
+
+# cflx_calc(outdir, runname, feedstock)
+
+
+
+
+
+
+
+# %% 
+
+# outdir = "/home/tykukla/SCEPTER/scepter_output"
+# runname = "noFert_cc_multiyear_site_311b_app_10000p0_psize_75_composite_field"
+# # runname = "lowfert_cc_311b_30p0_cc_field_tau15p0"
+# var_fn = 'flx_co2sp'
+# cdvar = 'ALK'
+
+# df, dfint = get_data(outdir, runname, var_fn, cdvar)
+
+# df = co2_flx(outdir, runname)
+# df
+# %% 
+# import matplotlib.pyplot as plt
+
+# plt.plot(dfint['time'], dfint['adv']*dfint['time'], label='adv')
+# plt.plot(dfint['time'], dfint['tflx']*dfint['time'], label='tflx')
+# plt.legend()
+# %% 
+# plt.plot(df['time'], df['adv'], label='adv')
+# plt.plot(df['time'], df['tflx'], label='tflx')
+# plt.plot(df['time'], df['tflx'] + df['adv'], label='tflx+adv')
+# plt.legend()
+
+# %% 
+
+# outdir = "/home/tykukla/SCEPTER/scepter_output"
+# runname = "noFert_cc_multiyear_site_311b_app_10000p0_psize_75_composite_field"
+# var_fn = "flx_sld"
+# cdvar = "cc"
+# organic_list = ["g1", "g2", "g3"]
+
+
+# # get the txt file as a pandas dataframe
+# fn = f"{var_fn}-{cdvar}.txt"   # flux timeseries
+# fn_int = f"int_{fn}"       # integrated flux timeseries
+# df = preprocess_txt(outdir, runname, fn, run_subdir = "flx")
+# dfint = preprocess_txt(outdir, runname, fn_int, run_subdir = "flx")
+
+# # %% 
+# plt.plot(dfint['time'], dfint['tflx']*dfint['time'])
+# plt.plot(dfint['time'], dfint['rain']*dfint['time'])
+
+# # %% 
+# plt.plot(dfint['time'], dfint['time']*(-1*(dfint['rain']) - dfint['tflx']))
+# plt.plot(dfint['time'], dfint['cc']*dfint['time'])
+
+# # %% 
+# plt.plot(dfint['time'], dfint['time']*(-1*(dfint['rain']) - dfint['cc']))
+# plt.plot(dfint['time'], (dfint['tflx']+dfint['adv'])*dfint['time'])
+
+# # %% 
+# plt.plot(dfint['time'], dfint['cc']*dfint['time'], label='diss')
+# plt.plot(dfint['time'], -1*dfint['rain']*dfint['time'], label='rain')
+# plt.plot(dfint['time'], dfint['tflx']*dfint['time'], label='tflx')
+# plt.plot(dfint['time'], dfint['adv']*dfint['time'], label='adv')
+# plt.legend()
+
+# # %% 
+# plt.plot(dfint['time'], dfint['time']*(-1*dfint['rain']-dfint['adv']), label='rain-adv')
+# plt.plot(dfint['time'], dfint['cc']*dfint['time'], label='diss')
+# plt.plot(dfint['time'], dfint['tflx']*dfint['time'], label='tflx')
+# plt.legend()
+
+# # %% 
+# plt.plot(df['time'], df['cc'], label='diss')
+# plt.plot(df['time'], -1*df['rain'], label='rain')
+# plt.plot(df['time'], df['tflx'], label='tflx')
+# plt.plot(df['time'], df['adv'], label='adv')
+# plt.ylim(-10, 10)
+# plt.legend()
+
+
+
+# # %%
+# plt.plot(dfint['time'], dfint['cc']*dfint['time'])
+# plt.plot(dfint['time'], -1*dfint['rain']*dfint['time'])
+
+# # %%
+# outdir = "/home/tykukla/SCEPTER/scepter_output"
+# runname = "noFert_cc_multiyear_site_311b_app_10000p0_psize_75_composite_field"
+# fn = "flx_aq-ca.txt"
+# df = preprocess_txt(outdir, runname, fn)
+# # %%
+# df
+# # %%
+# import matplotlib.pyplot as plt
+# plt.plot(df['time'], df['adv'])
+# plt.plot(df['time'], df['tflx'])
+# # %%
+
+
+
+# %% 
+
+# # .. inputs minus outputs
+# inputs_x = (dfint['cc'] + dfint['g2'])*dfint['time']
+# outputs_x = (dfint['dif'] + dfint['adv'])*dfint['time']
+
+
+## -- CONFIRMED
+## 
+## cc + g2 - (dif + adv) = tflx
+## --
+
+
+# plt.plot(dfint['time'],  -1*dfint['cc']*dfint['time'], label='cc_in')
+# plt.plot(dfint['time'],  -1*dfint['g2']*dfint['time'], label='g2_in')
+# plt.plot(dfint['time'],  -1*dfint['dif']*dfint['time'], label='dif_out')
+# plt.plot(dfint['time'],  -1*dfint['adv']*dfint['time'], label='adv_out')
+# plt.legend()
+# # %%
+
+
+# plt.plot(dfint['time'], -1*dfint['adv']*dfint['time'], label='adv_out')
+# plt.plot(dfint['time'], -1*dfint['cc']*dfint['time'], label='cc_in')
+# plt.plot(dfint['time'], -1*dfint['g2']*dfint['time'], label='g2_in')
+# plt.plot(dfint['time'], -1*dfint['tflx']*dfint['time'], label='tflx_')
+# plt.plot(dfint['time'], -1*(dfint['adv'] + dfint['cc'])*dfint['time'], label='adv - cc')
+# plt.plot(dfint['time'], -1*(dfint['adv'] + (dfint['cc']-dfint['tflx']))*dfint['time'], label='adv - (cc-tflx)')
+# plt.legend()
+# # %%
+# plt.plot(dfint['time'], dfint['tflx']*dfint['time'], label='tflx_')
+# plt.plot(dfint['time'], dfint['adv']*dfint['time'], label='adv_out')
+# # plt.plot(dfint['time'], dfint['dif']*dfint['time'], label='dif_out')
+# plt.plot(dfint['time'], dfint['cc']*dfint['time'], label='cc_in')
+# plt.legend()
+
+
+# # %% 
+# plt.plot(dfint['time'], (dfint['cc']*dfint['time'] / ((dfint['g2'] + dfint['cc'])*dfint['time'])))
+# # %%
+
+# plt.plot(dfint['time'], (dfint['cc'])*dfint['time'], label='cc')
+# plt.plot(dfint['time'], (dfint['g2'])*dfint['time'], label='g2')
+# plt.legend()
+# # %%
+# plt.plot(dfint['time'], (dfint['cc'])*dfint['time'] / ((dfint['cc'])*dfint['time'] + (dfint['g2'])*dfint['time']))
+
+
+# # %%
+# plt.plot(df['time'], df['adv'] - df['adv'][0], label='adv_out')
+# plt.plot(df['time'], df['cc'] - df['cc'][0], label='cc_in')
+# plt.plot(df['time'], df['g2'] - df['g2'][0], label='g2_in')
+# plt.plot(df['time'], df['tflx'] - df['tflx'][0], label='tflx_')
+# # plt.plot(df['time'], (df['adv'] + dfint['cc']), label='adv - cc')
+# # plt.plot(df['time'], (df['adv'] + (dfint['cc']-dfint['tflx'])), label='adv - (cc-tflx)')
+# plt.ylim(-4,4)
+# plt.legend()
+
+# # %%
+# plt.plot(df['time'], df['adv'] - df['adv'][0], label='adv_out')
+# plt.plot(df['time'], df['cc'] , label='cc_in')
+# # plt.plot(df['time'], df['g2'] , label='g2_in')
+# plt.plot(df['time'], df['g2'] - df['g2'][0], label='g2_in')
+# plt.plot(df['time'], df['tflx'] , label='tflx_')
+# # plt.plot(df['time'], (df['adv'] + dfint['cc']), label='adv - cc')
+# # plt.plot(df['time'], (df['adv'] + (dfint['cc']-dfint['tflx'])), label='adv - (cc-tflx)')
+# plt.ylim(-4,4)
+# plt.legend()
+# # %%
+
+# plt.plot(df['time'], df['adv'] - df['adv'][0], label='adv_out')
+# plt.plot(df['time'], -1*df['cc'] , label='cc_in')
+# # plt.plot(df['time'], df['g2'] , label='g2_in')
+# plt.plot(df['time'], -1*df['g2'] - -1*df['g2'][0], label='g2_in')
+# plt.plot(df['time'], -1*df['dif'] - -24.2, label='dif_in')
+# # plt.plot(df['time'], df['tflx'] , label='tflx_')
+# # plt.plot(df['time'], (df['adv'] + dfint['cc']), label='adv - cc')
+# # plt.plot(df['time'], (df['adv'] + (dfint['cc']-dfint['tflx'])), label='adv - (cc-tflx)')
+# plt.ylim(-4,4)
+# plt.legend()
+
+# # %% 
+# plt.plot(dfint['time'], -1*dfint['dif']*dfint['time'] - dfint['g2']*dfint['time'], label='-dif-resp')
+# plt.plot(dfint['time'], dfint['adv']*dfint['time'], label='adv')
+# plt.plot(dfint['time'], -1*dfint['cc']*dfint['time'], label='cc')
+# plt.plot(dfint['time'], -1*dfint['dif']*dfint['time'] - dfint['g2']*dfint['time'] - dfint['cc']*dfint['time'], label='-dif-resp+cc')
+# plt.plot(dfint['time'], -1*dfint['dif']*dfint['time'] - dfint['g2']*dfint['time'] - dfint['cc']*dfint['time'] - dfint['tflx']*dfint['time'], label='-dif-resp+cc+tflx',linestyle='dashed')
+# #plt.plot(dfint['time'], -1*dfint['dif']*dfint['time'], label='-dif')
+# #plt.plot(dfint['time'], dfint['g2']*dfint['time'], label='resp')
+# plt.legend()
+
+# # --- CONFIRMED -- 
+# # adv = (-dif-resp) - cc - tflx
+# # where:
+# #    -dif-resp = respired carbon that's stored (it's positive when resp > diffusion, indicating respired carbon staying in the system)
+# #    cc = c flux due to cc (cc is negative directionally, so subtracting it is the same as adding its absolute value (e.g., cc added to the system))
+# #    tflx = change in storage (tflx is negative directionally, so subtracting it is the same as adding its absolute value (e.g., added storage to the system))
+
+
+# # %%
+
+# # plt.plot(df['time'], -1*df['dif'] - df['g2'], label='-dif-resp')
+# plt.plot(df['time'], df['adv'], label='adv')
+# # plt.plot(df['time'], -1*df['cc'], label='cc')
+# # plt.plot(df['time'], -1*df['dif'] - df['g2'] - df['cc'], label='-dif-resp+cc')
+# plt.plot(df['time'], -1*df['dif'] - df['g2'] - df['cc'] - df['tflx'], label='-dif-resp-cc-tflx',linestyle='dashed')
+# #plt.plot(dfint['time'], -1*dfint['dif']*dfint['time'], label='-dif')
+# #plt.plot(dfint['time'], dfint['g2']*dfint['time'], label='resp')
+# plt.legend()
+
+# # %%
+# plt.plot(df['time'], df['tflx'], label='tflx')
+# plt.plot(df['time'], -1*df['dif'] - df['g2'] - df['cc'] - df['adv'], label='-dif-resp-cc-adv',linestyle='dashed')
+# plt.legend()
+# # %%
diff --git a/scepter/setup/scepter_helperFxns.py b/scepter/setup/scepter_helperFxns.py
index b18f45d..0235eaf 100644
--- a/scepter/setup/scepter_helperFxns.py
+++ b/scepter/setup/scepter_helperFxns.py
@@ -11,6 +11,7 @@
 import re
 import shutil
 import subprocess
+from scipy.integrate import cumulative_trapezoid
 
 import pandas as pd
 
@@ -431,7 +432,7 @@ def duration_check(
 
         # compare to target
         model_duration = df["time"].values.max()
-        diff_percent = ((model_duration - target_duration) / target_duration) * 100
+        diff_percent = abs(((model_duration - target_duration) / target_duration) * 100)
         diff_result = True if diff_percent < duration_tol else False
 
         # make output list
@@ -709,6 +710,77 @@ def run_complete_check(
             pass  # pass does nothing, creating an empty file
 
 
+# modify the flx/dust.txt file to get total dust flux over time 
+def dustflx_calc(
+    outdir: str,
+    runname_field: str, 
+    fdust1: float,
+    fdust2: float,
+    dustsp: str,
+    dustsp_2nd: str=None):
+    """
+    SCEPTER's default dust file (*/flx/dust.txt) shows time and the relative amount
+    of dust that gets applied. We use the dust flux to compute the timeseries
+    of dust application and the integrated dust application. Nothing gets returned,
+    but the updated dust.txt file gets saved. 
+
+    Parameters
+    ----------
+    outdir : str
+        output directory for SCEPTER results (ex: "home/name/SCEPTER/scepter_output")
+    runname_field : str
+        name of the directory for the SCEPTER field run (for now, we're not applying 
+        any changes to the lab dust fluxes)
+    fdust1 : float
+        [g m2 yr] amount of primary dust applied each year 
+    fdust2 : float
+        [g m2 yr] amount of secondary dust applied each year 
+    dustsp : str
+        name of the primary dust species (e.g., "cc" or "gbas")
+    dustsp_2nd : str
+        name of the secondary dust species (e.g., "amnt")
+
+    Results
+    -------
+    """
+    # read in the dust flux dataframe
+    file_path = os.path.join(outdir, runname_field, 'flx', 'dust.txt')
+    df = preprocess_txt(file_path)
+
+    # make sure dust fluxes are numbers
+    if not isinstance(fdust1, (int, float)):
+        try:
+            fdust1 = float(fdust1)
+        except:
+            fdust1 = 0
+    if not isinstance(fdust2, (int, float)):
+        try:
+            fdust2 = float(fdust2)
+        except:
+            fdust2 = 0
+    
+    # add a column for the dust flux
+    df['dust1_STATIC'] = fdust1
+    df['dust2_STATIC'] = fdust2
+
+    # compute dust timeseries 
+    df['dust1_g_m2_yr'] = df['dust(relative_to_average)'] * df['dust1_STATIC']
+    df['dust2_g_m2_yr'] = df['dust(relative_to_average)'] * df['dust2_STATIC']
+
+    # compute integrated dust timeseries
+    df['int_dust1_g_m2_yr'] = cumulative_trapezoid(df['dust1_g_m2_yr'], df['time'], initial=0)
+    df['int_dust2_g_m2_yr'] = cumulative_trapezoid(df['dust2_g_m2_yr'], df['time'], initial=0)
+
+    # add dust species info
+    df['dustsp1'] = dustsp
+    df['dustsp2'] = dustsp_2nd
+    
+    # save the result
+    df.to_csv(  # default is mode='w' which will overwrite the existing file (that's fine because we've merged it with the new data)
+        file_path, index=None, sep="\t"
+        )  
+
+
 # --------------------------------------------------------------------------
 
 
@@ -766,10 +838,17 @@ def to_aws(
             cmd_run = "s5cmd mv " + src + " " + dst_aws
             result1 = subprocess.run(cmd_activate, shell=True, check=True)
             result2 = subprocess.run(cmd_run, shell=True, check=True)
+            outdir_final = dst_aws
         elif aws_save == "copy":
             cmd_run = "s5cmd cp " + src + " " + dst_aws
             result1 = subprocess.run(cmd_activate, shell=True, check=True)
             result2 = subprocess.run(cmd_run, shell=True, check=True)
+            outdir_final = dst_aws
+        else:
+            outdir_final = outdir
+    
+    # return the new directory for postproc purposes
+    return outdir_final
 
 
 def aws_copy_to_local(