From 08bc9bc97dbdaf1c8571aedbc079924c48b6df09 Mon Sep 17 00:00:00 2001 From: Bobby Jackson Date: Tue, 23 Jan 2024 09:43:40 -0600 Subject: [PATCH 1/5] ADD: HYSPLIT reader --- act/io/__init__.py | 3 +- act/io/hysplit.py | 107 ++++++++++++++++++++++++++++++++++++ examples/io/plot_hysplit.py | 23 ++++++++ tests/io/test_hysplit.py | 18 ++++++ 4 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 act/io/hysplit.py create mode 100644 examples/io/plot_hysplit.py create mode 100644 tests/io/test_hysplit.py diff --git a/act/io/__init__.py b/act/io/__init__.py index 93a6b913ef..6549c83612 100644 --- a/act/io/__init__.py +++ b/act/io/__init__.py @@ -7,7 +7,7 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'text', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2'], + submodules=['arm', 'text', 'icartt', 'mpl', 'neon', 'noaagml', 'noaapsl', 'pysp2', 'hysplit'], submod_attrs={ 'arm': [ 'WriteDataset', @@ -39,5 +39,6 @@ ], 'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat'], 'sodar': ['read_mfas_sodar'], + 'hysplit': ['read_hysplit'] }, ) diff --git a/act/io/hysplit.py b/act/io/hysplit.py new file mode 100644 index 0000000000..e77c595d7c --- /dev/null +++ b/act/io/hysplit.py @@ -0,0 +1,107 @@ +import os +import xarray as xr +import numpy as np +import pandas as pd + +from datetime import datetime +from .text import read_csv + +def read_hysplit(filename, base_year=2000): + """ + Reads an input HYSPLIT trajectory for plotting in ACT. + + Parameters + ---------- + filename: str + The input file name. + base_year: int + The first year of the century in which the data are contained. + + Returns + ------- + ds: xarray Dataset + The ACT dataset containing the HYSPLIT trajectories + """ + + ds = xr.Dataset({}) + num_lines = 0 + with open(filename, 'r') as filebuf: + num_grids = int(filebuf.readline().split()[0]) + num_lines += 1 + grid_times = [] + grid_names = [] + forecast_hours = np.zeros(num_grids) + for i in range(num_grids): + data = filebuf.readline().split() + num_lines += 1 + grid_names.append(data[0]) + grid_times.append( + datetime(year=int(data[1]), month=int(data[2]), day=int(data[3]), hour=int(data[4]))) + forecast_hours[i] = int(data[5]) + ds["grid_forecast_hour"] = xr.DataArray(forecast_hours, dims=["num_grids"]) + ds["grid_forecast_hour"].attrs["standard_name"] = "Grid forecast hour" + ds["grid_forecast_hour"].attrs["units"] = "Hour [UTC]" + ds["grid_times"] = xr.DataArray(np.array(grid_times), dims=["num_grids"]) + data_line = filebuf.readline().split() + num_lines += 1 + ds.attrs["trajectory_direction"] = data_line[1] + ds.attrs["vertical_motion_calculation_method"] = data_line[2] + num_traj = int(data_line[0]) + traj_times = [] + start_lats = np.zeros(num_traj) + start_lons = np.zeros(num_traj) + start_alt = np.zeros(num_traj) + for i in range(num_traj): + data = filebuf.readline().split() + num_lines += 1 + traj_times.append( + datetime(year=(base_year + int(data[0])), month=int(data[1]), + day=int(data[2]), hour=int(data[3]))) + start_lats[i] = float(data[4]) + start_lons[i] = float(data[5]) + start_alt[i] = float(data[6]) + + ds["start_latitude"] = xr.DataArray(start_lats, dims=["num_trajectories"]) + ds["start_latitude"].attrs["long_name"] = "Trajectory start latitude" + ds["start_latitude"].attrs["units"] = "degree" + ds["start_longitude"] = xr.DataArray(start_lats, dims=["num_trajectories"]) + ds["start_longitude"].attrs["long_name"] = "Trajectory start longitude" + ds["start_longitude"].attrs["units"] = "degree" + ds["start_altitude"] = xr.DataArray(start_alt, dims=["num_trajectories"]) + ds["start_altitude"].attrs["long_name"] = "Trajectory start altitude" + ds["start_altitude"].attrs["units"] = "degree" + + + data = filebuf.readline().split() + num_lines += 1 + var_list = ["trajectory_number", "grid_number", "year", "month", "day", + "hour", "minute", "forecast_hour", "age", "lat", "lon", "alt"] + for variable in data[1:]: + var_list.append(variable) + input_df = pd.read_csv('/Users/rjackson/opencrums/trajectories/houstonaug300.0summer2010080100', + sep='\s+', index_col=False, + names=var_list, skiprows=12) + input_df['year'] = base_year + input_df['year'] + input_df['time'] = pd.to_datetime(input_df[["year", "month", "day", "hour", "minute"]], + format='%y%m%d%H%M') + input_df = input_df.set_index("time") + del input_df["year"] + del input_df["month"] + del input_df["day"] + del input_df["hour"] + del input_df["minute"] + ds = ds.merge(input_df.to_xarray()) + ds.attrs['_datastream'] = 'hysplit' + ds["trajectory_number"].attrs["standard_name"] = "Trajectory number" + ds["trajectory_number"].attrs["units"] = "1" + ds["grid_number"].attrs["standard_name"] = "Grid number" + ds["grid_number"].attrs["units"] = "1" + ds["age"].attrs["standard_name"] = "Grid number" + ds["age"].attrs["units"] = "1" + ds["lat"].attrs["standard_name"] = "Latitude" + ds["lat"].attrs["units"] = "degree" + ds["lon"].attrs["standard_name"] = "Longitude" + ds["lon"].attrs["units"] = "degree" + ds["alt"].attrs["standard_name"] = "Altitude" + ds["alt"].attrs["units"] = "meter" + return ds diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py new file mode 100644 index 0000000000..98ea72e4ef --- /dev/null +++ b/examples/io/plot_hysplit.py @@ -0,0 +1,23 @@ +""" +Read and plot a HYSPLIT trajectory file from a HYSPlIT run. +----------------------------------------------------------- + +This example shows how to read and plot a backtrajectory calculated by the NOAA +HYSPLIT model over Houston. + +Author: Robert Jackson +""" + +import act +import matplotlib.pyplot as plt + +from arm_test_data import DATASETS + +# Load the data +filename = DATASETS.fetch('houstonaug300.0summer2010080100') +ds = act.io.read_hysplit(filename) + +# Use the GeographicPlotDisplay object to make the plot +disp = act.plotting.GeographicPlotDisplay(ds) +disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'], color='k') +plt.show() diff --git a/tests/io/test_hysplit.py b/tests/io/test_hysplit.py new file mode 100644 index 0000000000..a7944232fd --- /dev/null +++ b/tests/io/test_hysplit.py @@ -0,0 +1,18 @@ +import act +import matplotlib.pyplot as plt + +from arm_test_data import DATASETS + + +def test_read_hysplit(): + filename = DATASETS.fetch('houstonaug300.0summer2010080100') + ds = act.io.read_hysplit(filename) + assert 'lat' in ds.variables.keys() + assert 'lon' in ds.variables.keys() + assert 'alt' in ds.variables.keys() + assert 'PRESSURE' in ds.variables.keys() + assert ds.dims["num_grids"] == 8 + assert ds.dims["num_trajectories"] == 1 + assert ds.dims['time'] == 121 + assert ds['age'].min() == -120 + From 6d72ff1a2e3e595ee969a3b7e0ff75d794f9f0f8 Mon Sep 17 00:00:00 2001 From: Bobby Jackson Date: Tue, 23 Jan 2024 10:05:03 -0600 Subject: [PATCH 2/5] ADD: Hysplit to datastream name --- act/io/hysplit.py | 2 +- examples/io/plot_hysplit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/act/io/hysplit.py b/act/io/hysplit.py index e77c595d7c..95d934319a 100644 --- a/act/io/hysplit.py +++ b/act/io/hysplit.py @@ -91,7 +91,7 @@ def read_hysplit(filename, base_year=2000): del input_df["hour"] del input_df["minute"] ds = ds.merge(input_df.to_xarray()) - ds.attrs['_datastream'] = 'hysplit' + ds.attrs['datastream'] = 'hysplit' ds["trajectory_number"].attrs["standard_name"] = "Trajectory number" ds["trajectory_number"].attrs["units"] = "1" ds["grid_number"].attrs["standard_name"] = "Grid number" diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py index 98ea72e4ef..6e53009311 100644 --- a/examples/io/plot_hysplit.py +++ b/examples/io/plot_hysplit.py @@ -19,5 +19,5 @@ # Use the GeographicPlotDisplay object to make the plot disp = act.plotting.GeographicPlotDisplay(ds) -disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'], color='k') +disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND']) plt.show() From c3e287986d9fe294adb2b96fc399521d4a9d4767 Mon Sep 17 00:00:00 2001 From: Bobby Jackson Date: Tue, 23 Jan 2024 10:11:58 -0600 Subject: [PATCH 3/5] STY: Pep8 and fix hard coded filename --- act/io/hysplit.py | 10 ++++------ tests/io/test_hysplit.py | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/act/io/hysplit.py b/act/io/hysplit.py index 95d934319a..f0c410d0eb 100644 --- a/act/io/hysplit.py +++ b/act/io/hysplit.py @@ -6,6 +6,7 @@ from datetime import datetime from .text import read_csv + def read_hysplit(filename, base_year=2000): """ Reads an input HYSPLIT trajectory for plotting in ACT. @@ -70,17 +71,14 @@ def read_hysplit(filename, base_year=2000): ds["start_altitude"] = xr.DataArray(start_alt, dims=["num_trajectories"]) ds["start_altitude"].attrs["long_name"] = "Trajectory start altitude" ds["start_altitude"].attrs["units"] = "degree" - - data = filebuf.readline().split() num_lines += 1 var_list = ["trajectory_number", "grid_number", "year", "month", "day", - "hour", "minute", "forecast_hour", "age", "lat", "lon", "alt"] + "hour", "minute", "forecast_hour", "age", "lat", "lon", "alt"] for variable in data[1:]: var_list.append(variable) - input_df = pd.read_csv('/Users/rjackson/opencrums/trajectories/houstonaug300.0summer2010080100', - sep='\s+', index_col=False, - names=var_list, skiprows=12) + input_df = pd.read_csv( + filename, sep='\s+', index_col=False, names=var_list, skiprows=12) input_df['year'] = base_year + input_df['year'] input_df['time'] = pd.to_datetime(input_df[["year", "month", "day", "hour", "minute"]], format='%y%m%d%H%M') diff --git a/tests/io/test_hysplit.py b/tests/io/test_hysplit.py index a7944232fd..ad762356a2 100644 --- a/tests/io/test_hysplit.py +++ b/tests/io/test_hysplit.py @@ -15,4 +15,3 @@ def test_read_hysplit(): assert ds.dims["num_trajectories"] == 1 assert ds.dims['time'] == 121 assert ds['age'].min() == -120 - From 494544c219d0bb8a445e780f858c46ad76e9f13c Mon Sep 17 00:00:00 2001 From: Bobby Jackson Date: Tue, 23 Jan 2024 13:49:12 -0600 Subject: [PATCH 4/5] FIX: Add HYSPLIT entry to sample_files --- act/tests/sample_files.py | 1 + examples/io/plot_hysplit.py | 4 ++-- tests/io/test_hysplit.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/act/tests/sample_files.py b/act/tests/sample_files.py index 06adc4a4a2..d1e1194cc5 100644 --- a/act/tests/sample_files.py +++ b/act/tests/sample_files.py @@ -64,6 +64,7 @@ EXAMPLE_OLD_QC = DATASETS.fetch('sgp30ecorE6.b1.20040705.000000.cdf') EXAMPLE_SONDE_WILDCARD = DATASETS.fetch('sgpsondewnpnC1.b1.20190101.053200.cdf') EXAMPLE_CEIL_WILDCARD = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc') +EXAMPLE_HYSPLIT = DATASETS.fetch('houstonaug300.0summer2010080100') # Multiple files in a list dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf', diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py index 6e53009311..24100281f6 100644 --- a/examples/io/plot_hysplit.py +++ b/examples/io/plot_hysplit.py @@ -11,10 +11,10 @@ import act import matplotlib.pyplot as plt -from arm_test_data import DATASETS +from act.tests import sample_files # Load the data -filename = DATASETS.fetch('houstonaug300.0summer2010080100') +filename = sample_files.EXAMPLE_HYSPLIT ds = act.io.read_hysplit(filename) # Use the GeographicPlotDisplay object to make the plot diff --git a/tests/io/test_hysplit.py b/tests/io/test_hysplit.py index ad762356a2..6a889a2f42 100644 --- a/tests/io/test_hysplit.py +++ b/tests/io/test_hysplit.py @@ -1,11 +1,11 @@ import act import matplotlib.pyplot as plt -from arm_test_data import DATASETS +from act.tests import sample_files def test_read_hysplit(): - filename = DATASETS.fetch('houstonaug300.0summer2010080100') + filename = sample_files.EXAMPLE_HYSPLIT ds = act.io.read_hysplit(filename) assert 'lat' in ds.variables.keys() assert 'lon' in ds.variables.keys() From 54d9c9d367c37f6669648b12d2887c12121e0bc4 Mon Sep 17 00:00:00 2001 From: Bobby Jackson Date: Tue, 23 Jan 2024 13:51:59 -0600 Subject: [PATCH 5/5] FIX: Ok, let's try this again. --- examples/io/plot_hysplit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/io/plot_hysplit.py b/examples/io/plot_hysplit.py index 24100281f6..6e53009311 100644 --- a/examples/io/plot_hysplit.py +++ b/examples/io/plot_hysplit.py @@ -11,10 +11,10 @@ import act import matplotlib.pyplot as plt -from act.tests import sample_files +from arm_test_data import DATASETS # Load the data -filename = sample_files.EXAMPLE_HYSPLIT +filename = DATASETS.fetch('houstonaug300.0summer2010080100') ds = act.io.read_hysplit(filename) # Use the GeographicPlotDisplay object to make the plot