DOI-USGS · lindsayplatt · Sep 26, 2022 · Dec 31, 2021 · Dec 31, 2021 · Jan 4, 2022
diff --git a/0_historic.yml b/0_historic.yml
@@ -45,6 +45,9 @@ targets:
       '0_historic/src/do_historic_gw_fetch.R',
       '0_historic/src/fetch_nwis_historic.R')
   # Task table for `uv` includes a step to average the data to daily values.
+  # This step needs the timezones for all the sites.
+  historic_gw_sites_uv_tz:
+    command: fetch_gw_site_tz(historic_gw_sites_uv)
   0_historic/out/historic_gw_data_uv.csv:
     command: do_historic_gw_fetch(
       final_target = target_name,
@@ -55,7 +58,8 @@ targets:
       service_cd = I('uv'),
       request_limit = historic_uv_fetch_size_limit,
       '0_historic/src/do_historic_gw_fetch.R',
-      '0_historic/src/fetch_nwis_historic.R')
+      '0_historic/src/fetch_nwis_historic.R',
+      gw_site_tz_xwalk_nm = I('historic_gw_sites_uv_tz'))
 
   # Special pull for just KS & just FL using `62610`. Plus, HI using `72150`.
   # Read all about why on GitHub: https://github.com/USGS-VIZLAB/gw-conditions/issues/9

diff --git a/0_historic/src/do_historic_addl_param_fetch.R b/0_historic/src/do_historic_addl_param_fetch.R
@@ -34,6 +34,17 @@ do_historic_addl_param_fetch <- function(final_target, addl_states, addl_gw_para
               steps[["identify_states"]]$target_name, task_name)
     }
   )
+
+  # Need local timezones for averaging to daily values
+  create_addl_sites_tz_xwalk <- create_task_step(
+    step_name = 'sites_tz_xwalk',
+    target_name = function(task_name, ...) {
+      sprintf('addl_sites_tz_xwalk_%s', task_name)
+    },
+    command = function(..., task_name, steps) {
+      sprintf("fetch_gw_site_tz(%s)", steps[['inventory_sites']]$target_name)
+    }
+  )
 
   download_addl_data <- create_task_step(
     step_name = 'download_addl_data',
@@ -51,14 +62,15 @@ do_historic_addl_param_fetch <- function(final_target, addl_states, addl_gw_para
                "request_limit = historic_uv_fetch_size_limit,",
                "'0_historic/src/do_historic_gw_fetch.R',",
                "'0_historic/src/fetch_nwis_historic.R',",
-               "include_ymls = I('%s'))" = task_makefile)
+               "include_ymls = I('%s')," = task_makefile,
+               "gw_site_tz_xwalk_nm = I('%s'))" = steps[['sites_tz_xwalk']]$target_name)
     }
   )
 
   # Create the task plan
   task_plan <- create_task_plan(
     task_names = tasks,
-    task_steps = list(identify_states, inventory_sites, download_addl_data),
+    task_steps = list(identify_states, inventory_sites, create_addl_sites_tz_xwalk, download_addl_data),
     final_steps = "download_addl_data",
     add_complete = FALSE)
 

diff --git a/0_historic/src/do_historic_gw_fetch.R b/0_historic/src/do_historic_gw_fetch.R
@@ -1,4 +1,6 @@
-do_historic_gw_fetch <- function(final_target, task_makefile, gw_site_nums, gw_site_nums_obj_nm, param_cd, service_cd, request_limit, ..., include_ymls = NULL) {
+do_historic_gw_fetch <- function(final_target, task_makefile, gw_site_nums, 
+                                 gw_site_nums_obj_nm, param_cd, service_cd, request_limit, ..., 
+                                 include_ymls = NULL, gw_site_tz_xwalk_nm = NULL) {
 
   # Number indicating how many sites to include per dataRetrieval request to prevent
   # errors from requesting too much at once. More relevant for surface water requests.
@@ -63,7 +65,8 @@ do_historic_gw_fetch <- function(final_target, task_makefile, gw_site_nums, gw_s
       command = function(..., task_name, steps) {
         psprintf("convert_uv_to_dv(",
                  "target_name = target_name,",
-                 "gw_uv_data_fn = '%s')" = steps[["download_data"]]$target_name)
+                 "gw_uv_data_fn = '%s'," = steps[["download_data"]]$target_name,
+                 "site_tz_xwalk = %s)" = gw_site_tz_xwalk_nm)
       }
     )
     task_steps <- c(task_steps, list(average_data))

diff --git a/0_historic/src/fetch_nwis_historic.R b/0_historic/src/fetch_nwis_historic.R
@@ -51,12 +51,17 @@ pull_sites_by_service <- function(site_df, service) {
   site_df %>% filter(data_type_cd == service) %>% pull(site_no)
 }
 
+fetch_gw_site_tz <- function(sites) {
+  readNWISsite(sites) %>% 
+    select(site_no, tz_cd)
+}
+
 fetch_gw_site_info <- function(data_fn) {
   sites <- read_csv(data_fn, col_types = 'cDn') %>% 
     pull(site_no) %>% 
     unique()
   readNWISsite(sites) %>% 
-    select(site_no, station_nm, state_cd, dec_lat_va, dec_long_va)
+    select(site_no, station_nm, state_cd, dec_lat_va, dec_long_va, tz_cd)
 }
 
 fetch_addl_uv_sites <- function(addl_states, param_cd, start_date, end_date) {
@@ -93,14 +98,111 @@ fetch_gw_historic_uv <- function(target_name, gw_sites, start_date, end_date, pa
     write_feather(target_name)
 }
 
-convert_uv_to_dv <- function(target_name, gw_uv_data_fn) {
+convert_uv_to_dv <- function(target_name, gw_uv_data_fn, site_tz_xwalk) {
   read_feather(gw_uv_data_fn) %>% 
-    mutate(Date = as.Date(dateTime)) %>% 
+
+    ### Convert to date using local timezone
+
+    # Remove the tz_cd column that is downloaded with the data
+    select(-tz_cd) %>% 
+    # Join in the local timezone information for each site
+    left_join(site_tz_xwalk, by = "site_no") %>% 
+    # Change the dateTime column to just the date. Need to use the timezone
+    # rather than UTC so that days are treated appropriately. Seems overly 
+    # complicated but you can't use `format()` with more than one tz, so 
+    # this vectorizes that call per timezone. Not using rowwise() because 
+    # that would be slow. Doing a single call per timezone speeds this up.
+    group_by(tz_cd) %>% 
+    nest() %>% 
+    pmap(function(tz_cd, data) {
+      data %>% mutate(Date = POSIXct_to_Date_tz(dateTime, tz_cd))
+    }) %>% 
+    bind_rows() %>% 
+
+    ### Reduce each instantaneous value to a single average for each date
     group_by(site_no, Date) %>% 
     summarize(GWL = mean(GWL_inst, na.rm = TRUE), .groups = "keep") %>% 
     write_feather(target_name)
 }
 
+# Convert POSIXct to Dates for a given timezone
+# Only works with one tz_abbr at a time
+POSIXct_to_Date_tz <- function(posix_dates, tz_abbr) {
+  # The "AST" for "Atlantic Standard Time" is not recognized by `format()`
+  # According to https://www.r-bloggers.com/2018/07/a-tour-of-timezones-troubles-in-r/
+  # we should be using location-based timezones to properly handle daylight savings time
+  # Not going to worry about the Indiana and Phoenix nuances for now.
+  tz_abbr_adj <- switch(
+    tz_abbr,
+    "AST" = "America/Virgin", 
+    "EST" = "America/New_York",
+    "EDT" = "America/New_York",
+    "CST" = "America/Chicago", 
+    "CDT" = "America/Chicago",
+    "MST" = "America/Denver",
+    "MDT" = "America/Denver",
+    "PST" = "America/Los_Angeles",
+    "PDT" = "America/Los_Angeles",
+    "AKST" = "America/Juneau",
+    "AKDT" = "America/Juneau",
+    "HST" = "US/Hawaii",
+    "HDT" = "US/Hawaii",
+    tz_abbr)
+
+  # Needs to retain POSIXct class and timestamp for extracting tz with '%Z' next
+  format(posix_dates, "%Y-%m-%d %H:%M:%S", tz=tz_abbr_adj, usetz=TRUE) %>% 
+    as.POSIXct(tz=tz_abbr_adj) %>% 
+    # For some reason, timezones above will only return daylight time,
+    # though it might have to due with whether your computer is in
+    # daylight or standard time at the moment you run the conversion
+    # code. I believe that the function below will appropriately account
+    # for that because it will test ST vs DT and do the appropriate switch.
+    adjust_for_daylight_savings(tz_desired = tz_abbr) %>% 
+    # Drop times and timezone before converting to a plain date or it will
+    # adjust using your local timezone.
+    format('%Y-%m-%d') %>% as.Date()
+
+}
+
+# Note that the output from this fxn will say 'PDT' but mean 'PST'
+# because you can't have a timezone of 'PST' (it will convert to GMT, 
+# even when using `lubridate::force_tz(., 'PST')`). This is used
+# internally before dropping time and going to a day, so I am 
+# accepting the risk.
+adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
+
+  # To go from daylight time (DT) to standard time
+  #  (ST), subtract an hour and vice versa. If the
+  #  `from` and `to` values are the same, don't 
+  #  change anything about the dates.
+  tz_conversion_xwalk <- tibble(
+    from = c('DT', 'ST', 'ST', 'DT'),
+    to = c('ST', 'DT', 'ST', 'DT'),
+    conversion_sec = c(-3600, 3600, 0, 0)
+  )
+
+  # There could be more than one timezone if the date range spans across
+  # the standard to daylight savings switch. Thus, we should be able to 
+  # convert each date independently (which happens in this piped sequence)
+  tibble(
+    in_dates = posix_dates,
+    in_tz = format(posix_dates, "%Z")
+  ) %>% 
+    mutate(
+      # Use the last two characters in both the current and desired
+      # timezones for matching with the conversion xwalk
+      from = stringr::str_sub(in_tz, -2, -1),
+      to = stringr::str_sub(tz_desired, -2, -1)
+    ) %>% 
+    # Join in conversion xwalk
+    left_join(tz_conversion_xwalk) %>%
+    # Alter the date values to match the desired timezone.
+    mutate(out_dates = in_dates + conversion_sec) %>% 
+    # Pull out just the dates to return
+    pull(out_dates)
+
+}
+
 combine_gw_fetches <- function(target_name, dv_fn, uv_fn, uv_addl_fn) {
   read_csv(dv_fn, col_types = 'cDn') %>% 
     bind_rows(read_csv(uv_fn, col_types = 'cDn')) %>% 

diff --git a/1_fetch.yml b/1_fetch.yml
@@ -71,6 +71,9 @@ targets:
       - viz_end_date
 
   # Task table for `uv` includes a step to average the data to daily values.
+  # This step needs the timezones for all the sites.
+  gw_quantile_site_tz_xwalk:
+    command: select(gw_quantile_site_info, I('site_no'), I('tz_cd'))
   1_fetch/out/gw_data_uv.csv:
     command: do_gw_fetch(
       final_target = target_name,
@@ -81,7 +84,8 @@ targets:
       service_cd = I('uv'),
       request_limit = uv_fetch_size_limit,
       '1_fetch/src/do_gw_fetch.R',
-      '1_fetch/src/fetch_nwis.R')
+      '1_fetch/src/fetch_nwis.R',
+      gw_site_tz_xwalk_nm = I('gw_quantile_site_tz_xwalk'))
     depends:
       - viz_start_date
       - viz_end_date

diff --git a/1_fetch/src/do_addl_gw_fetch.R b/1_fetch/src/do_addl_gw_fetch.R
@@ -35,7 +35,8 @@ do_addl_gw_fetch <- function(final_target, addl_param_cds) {
                "request_limit = uv_fetch_size_limit,",
                "'1_fetch/src/do_gw_fetch.R',",
                "'1_fetch/src/fetch_nwis.R',",
-               "include_ymls = I('%s'))" = task_makefile)
+               "include_ymls = I('%s')," = task_makefile,
+               "gw_site_tz_xwalk_nm = I('gw_quantile_site_tz_xwalk'))")
     }
   )
 

diff --git a/1_fetch/src/do_gw_fetch.R b/1_fetch/src/do_gw_fetch.R
@@ -1,5 +1,6 @@
 do_gw_fetch <- function(final_target, task_makefile, gw_site_nums, gw_site_nums_obj_nm, 
-                        param_cd, service_cd, request_limit, ..., include_ymls = NULL) {
+                        param_cd, service_cd, request_limit, ..., include_ymls = NULL,
+                        gw_site_tz_xwalk_nm = NULL) {
 
   # Number indicating how many sites to include per dataRetrieval request to prevent
   # errors from requesting too much at once. More relevant for surface water requests.
@@ -57,7 +58,8 @@ do_gw_fetch <- function(final_target, task_makefile, gw_site_nums, gw_site_nums_
       command = function(..., task_name, steps) {
         psprintf("convert_uv_to_dv(",
                  "target_name = target_name,",
-                 "gw_uv_data_fn = '%s')" = steps[["download_data"]]$target_name)
+                 "gw_uv_data_fn = '%s'," = steps[["download_data"]]$target_name,
+                 "site_tz_xwalk = %s)" = gw_site_tz_xwalk_nm)
       }
     )
     task_steps <- c(task_steps, list(average_data))

diff --git a/1_fetch/src/fetch_nwis.R b/1_fetch/src/fetch_nwis.R
@@ -30,14 +30,110 @@ fetch_gw_uv <- function(target_name, gw_sites, start_date, end_date, param_cd) {
     write_feather(target_name)
 }
 
-convert_uv_to_dv <- function(target_name, gw_uv_data_fn) {
+convert_uv_to_dv <- function(target_name, gw_uv_data_fn, site_tz_xwalk) {
   read_feather(gw_uv_data_fn) %>% 
-    mutate(Date = as.Date(dateTime)) %>% 
+
+    ### Convert to date using local timezone
+
+    # Remove the tz_cd column that is downloaded with the data
+    select(-tz_cd) %>% 
+    # Join in the local timezone information for each site
+    left_join(site_tz_xwalk, by = "site_no") %>% 
+    # Change the dateTime column to just the date. Need to use the timezone
+    # rather than UTC so that days are treated appropriately. Seems overly 
+    # complicated but you can't use `format()` with more than one tz, so 
+    # this vectorizes that call per timezone. Not using rowwise() because 
+    # that would be slow. Doing a single call per timezone speeds this up.
+    group_by(tz_cd) %>% 
+    nest() %>% 
+    pmap(function(tz_cd, data) {
+      data %>% mutate(Date = POSIXct_to_Date_tz(dateTime, tz_cd))
+    }) %>% 
+    bind_rows() %>% 
+
+    ### Reduce each instantaneous value to a single average for each date
     group_by(site_no, Date) %>% 
     summarize(GWL = mean(GWL_inst, na.rm = TRUE)) %>% 
     write_feather(target_name)
 }
 
+# Convert POSIXct to Dates for a given timezone
+# Only works with one tz_abbr at a time
+POSIXct_to_Date_tz <- function(posix_dates, tz_abbr) {
+  # The "AST" for "Atlantic Standard Time" is not recognized by `format()`
+  # According to https://www.r-bloggers.com/2018/07/a-tour-of-timezones-troubles-in-r/
+  # we should be using location-based timezones to properly handle daylight savings time
+  # Not going to worry about the Indiana and Phoenix nuances for now.
+  tz_abbr_adj <- switch(
+    tz_abbr,
+    "AST" = "America/Virgin", 
+    "EST" = "America/New_York",
+    "EDT" = "America/New_York",
+    "CST" = "America/Chicago", 
+    "CDT" = "America/Chicago",
+    "MST" = "America/Denver",
+    "MDT" = "America/Denver",
+    "PST" = "America/Los_Angeles",
+    "PDT" = "America/Los_Angeles",
+    "AKST" = "America/Juneau",
+    "AKDT" = "America/Juneau",
+    "HST" = "US/Hawaii",
+    "HDT" = "US/Hawaii",
+    tz_abbr)
+
+  # Needs to retain POSIXct class and timestamp for extracting tz with '%Z' next
+  format(posix_dates, "%Y-%m-%d %H:%M:%S", tz=tz_abbr_adj, usetz=TRUE) %>% 
+    as.POSIXct(tz=tz_abbr_adj) %>% 
+    # For some reason, timezones above will only return daylight time,
+    # though it might have to due with whether your computer is in
+    # daylight or standard time at the moment you run the conversion
+    # code. I believe that the function below will appropriately account
+    # for that because it will test ST vs DT and do the appropriate switch.
+    adjust_for_daylight_savings(tz_desired = tz_abbr) %>% 
+    # Drop times and timezone before converting to a plain date or it will
+    # adjust using your local timezone.
+    format('%Y-%m-%d') %>% as.Date()
+}
+
+# Note that the output from this fxn will say 'PDT' but mean 'PST'
+# because you can't have a timezone of 'PST' (it will convert to GMT, 
+# even when using `lubridate::force_tz(., 'PST')`). This is used
+# internally before dropping time and going to a day, so I am 
+# accepting the risk.
+adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
+
+  # To go from daylight time (DT) to standard time
+  #  (ST), subtract an hour and vice versa. If the
+  #  `from` and `to` values are the same, don't 
+  #  change anything about the dates.
+  tz_conversion_xwalk <- tibble(
+    from = c('DT', 'ST', 'ST', 'DT'),
+    to = c('ST', 'DT', 'ST', 'DT'),
+    conversion_sec = c(-3600, 3600, 0, 0)
+  )
+
+  # There could be more than one timezone if the date range spans across
+  # the standard to daylight savings switch. Thus, we should be able to 
+  # convert each date independently (which happens in this piped sequence)
+  tibble(
+    in_dates = posix_dates,
+    in_tz = format(posix_dates, "%Z")
+  ) %>% 
+    mutate(
+      # Use the last two characters in both the current and desired
+      # timezones for matching with the conversion xwalk
+      from = stringr::str_sub(in_tz, -2, -1),
+      to = stringr::str_sub(tz_desired, -2, -1)
+    ) %>% 
+    # Join in conversion xwalk
+    left_join(tz_conversion_xwalk) %>%
+    # Alter the date values to match the desired timezone.
+    mutate(out_dates = in_dates + conversion_sec) %>% 
+    # Pull out just the dates to return
+    pull(out_dates)
+
+}
+
 combine_gw_fetches <- function(target_name, dv_fn, uv_fn, uv_addl_fn) {
   read_csv(dv_fn, col_types = 'cDn') %>% 
     bind_rows(read_csv(uv_fn, col_types = 'cDn')) %>% 

diff --git a/gw-conditions/historic_gw_data_filtered.csv.ind b/gw-conditions/historic_gw_data_filtered.csv.ind
@@ -1,2 +1,2 @@
-hash: 991b0a39bf0916d45c3e883dba0ab760
+hash: 2975ba5bc35acc29f9a51d463594190c
 
diff --git a/gw-conditions/historic_gw_data_unfiltered.csv.ind b/gw-conditions/historic_gw_data_unfiltered.csv.ind
@@ -1,2 +1,2 @@
-hash: 5f41027b53594a6cff5007cfe775811a
+hash: eda380bbd7f3f384c3b1719035bd80f9
 
diff --git a/gw-conditions/historic_gw_quantiles.csv.ind b/gw-conditions/historic_gw_quantiles.csv.ind
@@ -1,2 +1,2 @@
-hash: 159c709725092b140fe0b8f3eb37d845
+hash: bc49e934336aa5a31262e97675b3e8d5
 
diff --git a/gw-conditions/historic_gw_site_info_filtered.rds.ind b/gw-conditions/historic_gw_site_info_filtered.rds.ind
@@ -1,2 +1,2 @@
-hash: 48114fcc5818e5c82ba45c06f0174308
+hash: 2ee7ab3937551ebd1cfc8f5e45d2e54e
 
diff --git a/gw-conditions/historic_gw_site_info_unfiltered.rds.ind b/gw-conditions/historic_gw_site_info_unfiltered.rds.ind
@@ -1,2 +1,2 @@
-hash: aab93b170f7889cbcf92f201b4b47165
+hash: 674be56e47bc7020e9cbbb62f161a54f
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		hash: 991b0a39bf0916d45c3e883dba0ab760
		hash: 2975ba5bc35acc29f9a51d463594190c
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		hash: 5f41027b53594a6cff5007cfe775811a
		hash: eda380bbd7f3f384c3b1719035bd80f9
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		hash: 159c709725092b140fe0b8f3eb37d845
		hash: bc49e934336aa5a31262e97675b3e8d5
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		hash: 48114fcc5818e5c82ba45c06f0174308
		hash: 2ee7ab3937551ebd1cfc8f5e45d2e54e
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		hash: aab93b170f7889cbcf92f201b4b47165
		hash: 674be56e47bc7020e9cbbb62f161a54f