-
Notifications
You must be signed in to change notification settings - Fork 3
/
xugrid_test1.py
117 lines (82 loc) · 4.03 KB
/
xugrid_test1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#========================================================
import s3fs # Importing the s3fs library for accessing S3 buckets
import xarray as xr # Importing xarray library for working with multidimensional arrays
import time # Importing the time library for recording execution time
import xugrid as xu # Importing xugrid library for working with unstructured grids
#========================================================
def read_netcdf_from_s3(bucket_name, key):
"""
Function to read a NetCDF file from an S3 bucket.
Parameters:
- bucket_name: Name of the S3 bucket
- key: Key/path to the NetCDF file in the bucket
Returns:
- ds: xarray Dataset containing the NetCDF data
"""
s3 = s3fs.S3FileSystem(anon=True) # Enable anonymous access to the S3 bucket
url = f"s3://{bucket_name}/{key}"
ds = xr.open_dataset(s3.open(url, 'rb'), drop_variables=['nvel']) # Open NetCDF dataset and drop 'nvel' variable
return ds
#========================================================
def subset_ugrid(ds, box):
"""
Function to subset an unstructured grid dataset based on a bounding box.
Parameters:
- ds: xarray Dataset containing the unstructured grid data
- box: Tuple representing the bounding box (x_min, x_max, y_min, y_max)
Returns:
- new_ds: Subset of the input dataset within the specified bounding box
"""
new_ds = ds.ugrid.sel(y=slice(box[2], box[3]), x=slice(box[0], box[1])) # Subset based on coordinates
return new_ds
#========================================================
def save_subset_to_netcdf(xarray_ds, output_file):
"""
Function to save a subset of an xarray Dataset to a NetCDF file.
Parameters:
- xarray_ds: Subset of the xarray Dataset
- output_file: Path to save the output NetCDF file
"""
xarray_ds.ugrid.to_netcdf(output_file) # Save the subset to a NetCDF file
#========================================================
def convert_to_xarray(dataset):
"""
Function to convert a dataset to an xarray Dataset format.
Parameters:
- dataset: Input dataset to convert
Returns:
- xarray_ds: Converted xarray Dataset
"""
xarray_data = {}
for varname, variable in dataset.variables.items():
xarray_data[varname] = (variable.dims, variable.values)
xarray_ds = xr.Dataset(xarray_data, attrs=dataset.attrs) # Create xarray Dataset
return xarray_ds
#========================================================
# Testing xugrid library
start_time = time.time() # Record the start time
bucket_name = 'noaa-gestofs-pds'
key = '_para3/stofs_2d_glo.20231130/stofs_2d_glo.t00z.fields.cwl.nc'
dataset = read_netcdf_from_s3(bucket_name, key) # Read NetCDF data from S3 bucket
end_time = time.time() # Record the end time
execution_time = end_time - start_time # Calculate execution time
print(f"Execution time for reading data: {execution_time} seconds") # Print execution time
start_time = time.time() # Record the start time
# Define the bounding box
box = (-70, -60, 40, 50)
uds = xu.UgridDataset(dataset) # Convert dataset to Ugrid format
end_time = time.time() # Record the end time
execution_time = end_time - start_time # Calculate execution time
print(f"Execution time for converting to Ugrid: {execution_time} seconds") # Print execution time
start_time = time.time() # Record the start time
uds2 = subset_ugrid(uds, box) # Subset the Ugrid dataset
end_time = time.time() # Record the end time
execution_time = end_time - start_time # Calculate execution time
print(f"Execution time for subsetting: {execution_time} seconds") # Print execution time
start_time = time.time() # Record the start time
output_file = 'stofs_subset1.nc'
save_subset_to_netcdf(uds2, output_file) # Save the subset to a NetCDF file
end_time = time.time() # Record the end time
execution_time = end_time - start_time # Calculate execution time
print(f"Execution time for writing: {execution_time} seconds") # Print execution time
#========================================================