fix the missing swe

geo-smart · Dec 2, 2022 · 05a934e · 05a934e
1 parent 9e9593a
commit 05a934e
Show file tree

Hide file tree

Showing 37 changed files with 941 additions and 10,292 deletions.
diff --git a/code/data_WUS_UCLA_SR.py b/code/data_WUS_UCLA_SR.py
@@ -2,5 +2,4 @@
 
 print("get UCLA data and prepare it into csv")
 
-# https://nsidc.org/data/wus_ucla_sr/versions/1
-# https://n5eil01u.ecs.nsidc.org/SNOWEX/WUS_UCLA_SR.001/
+
diff --git a/code/data_gee_gridmet_station_only.py b/code/data_gee_gridmet_station_only.py
@@ -1,4 +1,3 @@
-
 import json
 import pandas as pd
 import ee
@@ -99,6 +98,3 @@ def poi_mean(img):
           pass
 
     all_cell_df.to_csv(f"{dfolder}/{column_name}.csv")  
-
-
-
diff --git a/code/data_integration.py b/code/data_integration.py
@@ -11,7 +11,7 @@
 import geojson
 import numpy as np
 import os.path
-from datetime import datetime
+from datetime import datetime,timedelta
 
 print("integrating datasets into one dataset")
 # pd.set_option('display.max_columns', None)
@@ -35,7 +35,11 @@
 training_feature_pd = pd.read_csv(training_feature_file, header=0, index_col=0)
 testing_feature_pd = pd.read_csv(testing_feature_file, header=0, index_col=0)
 train_labels_pd = pd.read_csv(train_labels_file, header=0, index_col=0)
-# print(training_feature_pd.head())
+print(train_labels_pd.head())
+# if "2ca6a37f-67f5-4905-864b-ddf98d956ebb" in train_labels_pd.index and "2013-01-02" in train_labels_pd.columns:
+#   print("Check one value: ", train_labels_pd.loc["2ca6a37f-67f5-4905-864b-ddf98d956ebb"]["2013-01-02"])
+# else:
+#   print("Key not existed")
 
 station_cell_mapper_pd = pd.read_csv(station_cell_mapper_file, header=0, index_col=0)
 
@@ -265,8 +269,9 @@ def prepare_training_csv_nsidc():
     input columns: [m, doy, tmmn, tmmx, pr, vpd, eto, rmax, rmin, vs]
     output column: [swe]
   """
-    all_ready_file = f"{github_dir}/data/ready_for_training/all_ready.csv"
+    all_ready_file = f"{github_dir}/data/ready_for_training/all_ready_new.csv"
     if os.path.isfile(all_ready_file):
+        print("The file already exists. Exiting..")
         return
     all_gridmet_eto_file = f"{github_dir}/data/ready_for_training/gridmet_eto_all.csv"
     gridmet_eto_all_pd = pd.read_csv(all_gridmet_eto_file, header=0, index_col=0)
@@ -301,14 +306,14 @@ def prepare_training_csv_nsidc():
     print("testing_feature_pd size: ", testing_feature_pd.shape)
     all_valid_columns = gridmet_eto_all_pd.columns.values
     all_training_pd = pd.DataFrame(
-        columns=["cell_id", "year", "m", "doy", "eto", "pr", "rmax", "rmin", "tmmn", "tmmx", "vpd", "vs", "lat", "lon",
-                 "elevation", "aspect", "curvature", "slope", "eastness", "northness", "swe", "depth"])
+        columns=["cell_id", "year", "m", "day", "eto", "pr", "rmax", "rmin", "tmmn", "tmmx", "vpd", "vs", "lat", "lon",
+                 "elevation", "aspect", "curvature", "slope", "eastness", "northness", "swe_0719", "depth_0719", "swe_snotel"])
     all_training_pd = all_training_pd.reset_index()
     for index, row in nsidc_all_pd.iterrows():
         month = row['Month']
         year = row['Year']
-        doy = row['Day']
-        print(f"Dealing {year} {month} {doy}")
+        day = row['Day']
+        print(f"Dealing {year} {month} {day}")
         lat = row['Lat']
         lon = row['Lon']
         print("lat lon: ", lat, " ", lon)
@@ -332,8 +337,18 @@ def prepare_training_csv_nsidc():
         slope = grid_terrain_pd.loc[ind, "Slope [deg]"]
         eastness = grid_terrain_pd.loc[ind, "Eastness [unitCirc.]"]
         northness = grid_terrain_pd.loc[ind, "Northness [unitCirc.]"]
+        cdate = datetime(year=int(year), month=int(month), day=int(day))
+        current_date = cdate.strftime("%Y-%m-%d")
+
+        if cell_id in train_labels_pd.index and current_date in train_labels_pd.columns:
+#           print("Check one value: ", train_labels_pd.loc[cell_id][current_date])
+          swe_snotel = train_labels_pd.loc[cell_id][current_date]
+        else:
+          swe_snotel = -1
+#           print("Key not existed")
+
         if not np.isnan(swe):
-            json_kv = {"cell_id":cell_id,"year":year, "m":month, "doy": doy, "eto":eto, "pr":pr, "rmax":rmax, "rmin":rmin, "tmmn":tmmn, "tmmx":tmmx, "vpd":vpd, "vs":vs, "lat":lat, "lon":lon, "elevation":elevation, "aspect":aspect, "curvature":curvature, "slope":slope, "eastness":eastness, "northness":northness, "swe":swe, "depth":depth}
+            json_kv = {"cell_id":cell_id,"year":year, "m":month, "day": day, "eto":eto, "pr":pr, "rmax":rmax, "rmin":rmin, "tmmn":tmmn, "tmmx":tmmx, "vpd":vpd, "vs":vs, "lat":lat, "lon":lon, "elevation":elevation, "aspect":aspect, "curvature":curvature, "slope":slope, "eastness":eastness, "northness":northness, "swe_0719":swe, "depth_0719":depth, "swe_snotel": swe_snotel}
             print(json_kv)
             all_training_pd = all_training_pd.append(json_kv, ignore_index=True)
             print(all_training_pd.shape)

diff --git a/code/data_nsidc_4km_swe.py b/code/data_nsidc_4km_swe.py
@@ -35,7 +35,7 @@
 # crs, lat, lon, time, time_str, DEPTH, SWE, SWE_MASK
 # change to make it work
 end_year = 2019
-nsidc_data_file = f"{homedir}/Documents/Geoweaver/4km_SWE_Depth_WY{end_year}_v01.nc"
+nsidc_data_file = f"{homedir}/Documents/data/4km_SWE_Depth_WY{end_year}_v01.nc"
 nsidc_data_ds = nc.Dataset(nsidc_data_file)
 
 print(nsidc_data_ds)
@@ -174,4 +174,3 @@ def find_nearest_2(find_lat, find_lng):
 all_cells_df.to_csv(f"{dfolder}/{end_year}nsidc_data.csv")
 
 print("finished")
-
diff --git a/code/model_creation_rf.py b/code/model_creation_rf.py
@@ -63,5 +63,3 @@ def evaluate(self):
     print('R2 score is {}'.format(r2))
     print('RMSE is {}'.format(rmse))
     return {"mae":mae, "mse": mse, "r2": r2, "rmse": rmse}
-
-
diff --git a/code/process.json b/code/process.json
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,5 +2,4 @@

		print("get UCLA data and prepare it into csv")

		# https://nsidc.org/data/wus_ucla_sr/versions/1
		# https://n5eil01u.ecs.nsidc.org/SNOWEX/WUS_UCLA_SR.001/
Original file line number	Diff line number	Diff line change
Expand Up		@@ -63,5 +63,3 @@ def evaluate(self):
		print('R2 score is {}'.format(r2))
		print('RMSE is {}'.format(rmse))
		return {"mae":mae, "mse": mse, "r2": r2, "rmse": rmse}