Skip to content

Commit

Permalink
refactor(pooch): check if files exist in repo before downloading (#153)
Browse files Browse the repository at this point in the history
* check known location ../data for data files (present e.g. if repo was cloned)
* only download from github to pooch cachedir if data files don't already exist
  • Loading branch information
wpbonelli authored Feb 21, 2024
1 parent f06363d commit 7b61b9a
Show file tree
Hide file tree
Showing 24 changed files with 609 additions and 317 deletions.
34 changes: 24 additions & 10 deletions scripts/ex-gwf-advtidal.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
# Example name and base workspace
sim_name = "ex-gwf-advtidal"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand Down Expand Up @@ -184,8 +186,11 @@ def build_models():
ghb_spd += [[1, i, 9, "tides", 15.0, "ESTUARY-L2"] for i in range(nrow)]
ghb_spd += [[2, i, 9, "tides", 1500.0, "ESTUARY-L3"] for i in range(nrow)]
ghb_spd = {0: ghb_spd}
fname = "tides.csv"
fname = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/tides.csv",
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:425337a0bf24fa72c9e40f4e3d9f698a",
)
tsdict = get_timeseries(fname, "tides", "linear")
Expand Down Expand Up @@ -223,12 +228,15 @@ def build_models():
[0, 2, 4, -20, ""],
[0, 13, 5, -40, ""],
]
fname = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/wellrates.csv",
fname = "wellrates.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:6ca7366be279d679b14e8338a195422f",
)
tsdict = get_timeseries(
fname,
fpath,
["well_1_rate", "well_2_rate", "well_6_rate"],
3 * ["stepwise"],
)
Expand Down Expand Up @@ -256,12 +264,15 @@ def build_models():
+ 3 * [""]
)
riv_spd = list(zip(rivlay, rivrow, rivcol, rivstg, rivcnd, rivrbt, rivbnd))
fname = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/riverstage.csv",
fname = "riverstage.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:83f8b526ec6e6978b1d9dbd6fde231ef",
)
tsdict = get_timeseries(
fname,
fpath,
["river_stage_1", "river_stage_2"],
["linear", "stepwise"],
)
Expand Down Expand Up @@ -291,12 +302,15 @@ def build_models():
result["areas"][i] / delr / delc,
]
)
fname = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/recharge{ipak + 1}.csv",
fname = f"recharge{ipak + 1}.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash=f"md5:{hashes[ipak]}",
)
tsdict = get_timeseries(
fname,
fpath,
[f"rch_{ipak + 1}"],
["stepwise"],
filename=f"{sim_name}.rch{ipak + 1}.ts",
Expand Down
11 changes: 8 additions & 3 deletions scripts/ex-gwf-bcf2ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
# Example name and base workspace
sim_name = "ex-gwf-bcf2ss"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand All @@ -47,11 +49,14 @@
time_units = "days"

# Load the wetdry array for layer 1
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/wetdry01.txt",
fname = "wetdry01.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:3a4b357b7d2cd5175a205f3347ab973d",
)
wetdry_layer0 = np.loadtxt(pth)
wetdry_layer0 = np.loadtxt(fpath)

# Scenario-specific parameters
parameters = {
Expand Down
11 changes: 8 additions & 3 deletions scripts/ex-gwf-bump.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
# Example name and base workspace
sim_name = "ex-gwf-bump"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand Down Expand Up @@ -81,11 +83,14 @@
shape3d = (nlay, nrow, ncol)

# Load the bottom
fpth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/bottom.txt",
fname = "bottom.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:9287f9e214147d95e6ed159732079a0b",
)
botm = np.loadtxt(fpth).reshape(shape3d)
botm = np.loadtxt(fpath).reshape(shape3d)

# Create a cylinder
cylinder = botm.copy()
Expand Down
29 changes: 20 additions & 9 deletions scripts/ex-gwf-capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
# Example name and base workspace
sim_name = "ex-gwf-capture"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand All @@ -51,21 +53,30 @@
time_units = "seconds"

# Load the bottom, hydraulic conductivity, and idomain arrays
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/bottom.txt",
fname = "bottom.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:201758a5b7febb0390b8b52e634be27f",
)
bottom = np.loadtxt(pth)
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/hydraulic_conductivity.txt",
bottom = np.loadtxt(fpath)
fname = "hydraulic_conductivity.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:6c78564ba92e850d7d51d6e957b8a3ff",
)
k11 = np.loadtxt(pth)
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/idomain.txt",
k11 = np.loadtxt(fpath)
fname = "idomain.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:435d4490adff7a35d1d4928661e45d81",
)
idomain = np.loadtxt(pth, dtype=np.int32)
idomain = np.loadtxt(fpath, dtype=np.int32)

# Model parameters
nper = 1 # Number of periods
Expand Down
20 changes: 14 additions & 6 deletions scripts/ex-gwf-csub-p01.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
# Example name and base workspace
sim_name = "ex-gwf-csub-p01"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand Down Expand Up @@ -77,11 +79,14 @@
locw201 = 11

# Load the aquifer load time series
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/train_load_193704231304.csv",
fname = "train_load_193704231304.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:32dc8e7b7e39876374af43605e264725",
)
csv_load = np.genfromtxt(pth, names=True, delimiter=",")
csv_load = np.genfromtxt(fpath, names=True, delimiter=",")

# Reformat csv data into format for MODFLOW 6 timeseries file
csub_ts = []
Expand Down Expand Up @@ -306,12 +311,15 @@ def plot_results(sim, silent=True):
sim_date = [dstart + datetime.timedelta(seconds=x) for x in sim_obs["totim"]]

# get the observed head
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/s201_gw_2sec.csv",
fname = "s201_gw_2sec.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:1098bcd3f4fc1bd3b38d3d55152a8fbb",
)
dtype = [("date", object), ("dz_m", float)]
obs_head = np.genfromtxt(pth, names=True, delimiter=",", dtype=dtype)
obs_head = np.genfromtxt(fpath, names=True, delimiter=",", dtype=dtype)
obs_date = []
for s in obs_head["date"]:
obs_date.append(
Expand Down
32 changes: 21 additions & 11 deletions scripts/ex-gwf-csub-p03.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
# Example name and base workspace
sim_name = "ex-gwf-csub-p03"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand All @@ -42,11 +44,14 @@

# +
# Load the constant time series
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/boundary_heads.csv",
fname = "boundary_heads.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/",
fname=fname,
path=data_path,
known_hash="md5:8177e15feeeedcdd59ee15745e796e59",
)
csv_head = np.genfromtxt(pth, names=True, delimiter=",")
csv_head = np.genfromtxt(fpath, names=True, delimiter=",")

# Reformat csv data into format for MODFLOW 6 timeseries file
chd_ts = []
Expand Down Expand Up @@ -769,11 +774,13 @@ def export_tables(silent=True):


def get_obs_dataframe(file_name, hash):
fpth = pooch.retrieve(
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{file_name}",
fname=file_name,
path=data_path,
known_hash=f"md5:{hash}",
)
df = pd.read_csv(fpth, index_col=0)
df = pd.read_csv(fpath, index_col=0)
df.index = pd.to_datetime(df.index.values)
df.rename({"mean": "observed"}, inplace=True, axis=1)
return df
Expand Down Expand Up @@ -1227,17 +1234,20 @@ def plot_head_es_comparison(silent=True):


def plot_calibration(silent=True):
with styles.USGSPlot() as fs:
with styles.USGSPlot():
name = list(parameters.keys())[1]
pth = os.path.join(workspace, name, f"{name}.csub.obs.csv")
df_sim = get_sim_dataframe(pth)
fpath = os.path.join(workspace, name, f"{name}.csub.obs.csv")
df_sim = get_sim_dataframe(fpath)
df_sim.rename({"TOTAL": "simulated"}, inplace=True, axis=1)

pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/boundary_heads.csv",
fname = "boundary_heads.csv"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:8177e15feeeedcdd59ee15745e796e59",
)
df_obs_heads, col_list = process_sim_csv(pth)
df_obs_heads, col_list = process_sim_csv(fpath)

ccolors = (
"black",
Expand Down
11 changes: 8 additions & 3 deletions scripts/ex-gwf-csub-p04.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Example name and base workspace
sim_name = "ex-gwf-csub-p04"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand Down Expand Up @@ -88,11 +90,14 @@
ib_thick = [float(value) for value in ib_thick_str.split(",")]

# Load active domain and create idomain array
pth = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/idomain.txt",
fname = "idomain.txt"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:2f05a27b6f71e564c0d3616e3fd00ac8",
)
ib = np.loadtxt(pth, dtype=int)
ib = np.loadtxt(fpath, dtype=int)
idomain = np.tile(ib, (nlay, 1))

# Constant head boundary cells
Expand Down
1 change: 0 additions & 1 deletion scripts/ex-gwf-curvilinear-90.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@

# +
class DisvPropertyContainer:

"""
Dataclass that stores MODFLOW 6 DISV grid information.
Expand Down
1 change: 0 additions & 1 deletion scripts/ex-gwf-curvilinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@

# +
class DisvPropertyContainer:

"""
Dataclass that stores MODFLOW 6 DISV grid information.
Expand Down
11 changes: 8 additions & 3 deletions scripts/ex-gwf-disvmesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
# Example name and base workspace
sim_name = "ex-gwf-disvmesh"
workspace = pl.Path("../examples")
data_path = pl.Path(f"../data/{sim_name}")
data_path = data_path if data_path.is_dir() else None

# Settings from environment variables
write = get_env("WRITE", True)
Expand Down Expand Up @@ -106,11 +108,14 @@ def from_argus_export(fname):


# Load argus mesh and get disv grid properties
fname = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/ex-gwf-disvmesh/argus.exp",
fname = "argus.exp"
fpath = pooch.retrieve(
url=f"https://github.com/MODFLOW-USGS/modflow6-examples/raw/master/data/{sim_name}/{fname}",
fname=fname,
path=data_path,
known_hash="md5:072a758ca3d35831acb7e1e27e7b8524",
)
verts, iverts = from_argus_export(fname)
verts, iverts = from_argus_export(fpath)
gridprops = flopy.utils.cvfdutil.get_disv_gridprops(verts, iverts)
cell_areas = []
for i in range(gridprops["ncpl"]):
Expand Down
Loading

0 comments on commit 7b61b9a

Please sign in to comment.