DOI-USGS · dblodgett-usgs · Nov 19, 2024 · Nov 16, 2024 · Nov 16, 2024 · Nov 16, 2024
diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml
@@ -10,59 +10,37 @@ name: R-CMD-check
 
 jobs:
   R-CMD-check:
-    runs-on: ubuntu-20.04
+    runs-on: ${{ matrix.config.os }}
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       _R_CHECK_TESTS_NLINES_: 0
+      NOT_CRAN: true
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macos-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: r-lib/actions/setup-r@v2
-
-      - name: Query dependencies
-        run: |
-          install.packages('remotes')
-          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
-        shell: Rscript {0}
+      - uses: r-lib/actions/setup-pandoc@v2
 
-      - name: Cache R packages
-        if: runner.os != 'Windows'
-        uses: actions/cache@v2
+      - uses: r-lib/actions/setup-r@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
-
-      - name: Install system dependencies
-        if: runner.os == 'Linux'
-        env:
-          RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
-        run: |
-          Rscript -e "remotes::install_github('r-hub/sysreqs')"
-          sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
-          sudo -s eval "$sysreqs"
-
-      - name: Install dependencies
-        run: |
-          install.packages(c("remotes"))
-          remotes::install_deps(dependencies = TRUE)
-          remotes::install_cran("rcmdcheck")
-          remotes::install_cran("covr")
-        shell: Rscript {0}
-
-      - name: Check
-        run: rcmdcheck::rcmdcheck(args = c("--ignore-vignettes", "--no-manual"), build_args = c("--no-build-vignettes"), error_on = "error", check_dir = "check")
-        shell: Rscript {0}
+            r-version: ${{ matrix.config.r }}
+            use-public-rspm: true
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck, any::covr
+          needs: check, coverage
 
-      - name: Upload check results
-        if: failure()
-        uses: actions/upload-artifact@v3
+      - uses: r-lib/actions/check-r-package@v2
         with:
-          name: results
-          path: check
+          upload-snapshots: true
 
       - name: Test coverage
         run: covr::codecov()
-        shell: Rscript {0}
+        shell: Rscript {0}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ncdfgeom
 Type: Package
 Title: 'NetCDF' Geometry and Time Series
-Version: 1.2.0
+Version: 1.3.0
 Authors@R: c(person("David", "Blodgett", role = c("aut", "cre"), 
                     email = "dblodgett@usgs.gov"),
              person("Luke", "Winslow", role = "ctb"))
@@ -15,4 +15,4 @@ Suggests: testthat, knitr, rmarkdown, pkgdown, jsonlite, zip, ncdf4, nhdplusTool
 License: CC0
 Encoding: UTF-8
 VignetteBuilder: knitr
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
diff --git a/R/calculate_area_intersection_weights.R b/R/calculate_area_intersection_weights.R
@@ -1,140 +1,106 @@
-#' Area Weighted Intersection (areal implementation)
+#' Area Weighted Intersection
 #' @description Returns the fractional percent of each
 #' feature in x that is covered by each intersecting feature
 #' in y. These can be used as the weights in an area-weighted
-#' mean overlay analysis where x is the data source and area-
-#' weighted means are being generated for the target, y.
+#' mean overlay analysis where x is the data **source** and area-
+#' weighted means are being generated for the **target**, y.
 #'
 #' This function is a lightwieght wrapper around the functions
 #' \link[areal]{aw_intersect} \link[areal]{aw_total} and \link[areal]{aw_weight}
 #' from the \href{https://chris-prener.github.io/areal/}{areal package}.
 #'
 #' @param x sf data.frame source features including one geometry column and one identifier column
 #' @param y sf data.frame target features including one geometry column and one identifier column
-#' @param normalize logical return normalized weights or not. See details and examples.
+#' @param normalize logical return normalized weights or not. 
+#' 
+#' Normalized weights express the fraction of **target** polygons covered by 
+#' a portion of each **source** polygon. They are normalized in that the area
+#' of each **source** polygon has already been factored into the weight.
+#' 
+#' Un-normalized weights express the fraction of **source** polygons covered by
+#' a portion of each **target** polygon. This is a more general form that requires
+#' knowledge of the area of each **source** polygon to derive area-weighted
+#' statistics from **source** to **target.
+#' 
+#' See details and examples for more regarding this distinction.
+#' 
 #' @param allow_lonlat boolean If FALSE (the default) lon/lat target features are not allowed.
 #' Intersections in lon/lat are generally not valid and problematic at the international date line.
+#' 
 #' @return data.frame containing fraction of each feature in x that is
 #' covered by each feature in y. 
 #' 
 #' @details
 #' 
 #' Two versions of weights are available: 
 #' 
-#' `normalize = FALSE`, if a polygon from x is entirely within a polygon in y,
-#' w will be 1. If a polygon from x is 50% in one polygon from y and 50% in another, there
-#' will be two rows, one for each x/y pair of features with w = 0.5 in each. Weights
-#' will sum to 1 per SOURCE polygon if the target polygons fully cover that feature.
-#' `normalize = TRUE`, weights are divided by the target polygon area such that weights
-#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons.
-#'
-#' @examples
-#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1),
-#'                            c(1,1), c(-1,1),
-#'                            c(-1,-1))))
-#' b2 = b1 + 2
-#' b3 = b1 + c(-0.2, 2)
-#' b4 = b1 + c(2.2, 0)
-#' b = sf::st_sfc(b1, b2, b3, b4)
-#' a1 = b1 * 0.8
-#' a2 = a1 + c(1, 2)
-#' a3 = a1 + c(-1, 2)
-#' a = sf::st_sfc(a1,a2,a3)
-#' plot(b, border = 'red')
-#' plot(a, border = 'green', add = TRUE)
-#'
-#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070)
-#'
-#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4)))
-#' a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3)))
-#'
-#' sf::st_agr(a) <- sf::st_agr(b) <- "constant"
-#'
-#' calculate_area_intersection_weights(a, b, normalize = FALSE)
-#' calculate_area_intersection_weights(a, b, normalize = TRUE)
-#' calculate_area_intersection_weights(b, a, normalize = FALSE)
-#' calculate_area_intersection_weights(b, a, normalize = TRUE)
-#' 
-#' #a more typical arrangement of polygons
-#' 
-#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1),
-#'                            c(1,1), c(-1,1),
-#'                            c(-1,-1))))
-#' b2 = b1 + 2
-#' b3 = b1 + c(0, 2)
-#' b4 = b1 + c(2, 0)
-#' b = sf::st_sfc(b1, b2, b3, b4)
-#' a1 = b1 * 0.75 + c(-.25, -.25)
-#' a2 = a1 + 1.5
-#' a3 = a1 + c(0, 1.5)
-#' a4 = a1 + c(1.5, 0)
-#' a = sf::st_sfc(a1,a2,a3, a4)
-#' plot(b, border = 'red', lwd = 3)
-#' plot(a, border = 'green', add = TRUE)
-#' 
-#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070)
-#'
-#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4)))
-#' a <- sf::st_sf(a, data.frame(ida = c("a", "b", "c", "d")))
-#'
-#' sf::st_agr(a) <- sf::st_agr(b) <- "constant"
-#'
-#' # say we have data from `a` that we want sampled to `b`.
-#' # this gives the percent of each `a` that intersects each `b`
+#' `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y
+#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) 
+#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 
+#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that 
+#' feature.
 #' 
-#' (a_b <- calculate_area_intersection_weights(a, b, normalize = FALSE))
+#' For `normalize = FALSE` the area weighted mean calculation must include the area of each 
+#' x (source) polygon as in:
 #' 
-#' # note that `w` sums to 1 where `b` completely covers `a`.
+#' > *in this case, `area` is the area of source polygons and you would do this operation grouped 
+#' by target polygon id.*
+#'
+#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)`
 #' 
-#' dplyr::summarize(dplyr::group_by(a_b, ida), w = sum(w))
+#' If `normalize = TRUE`, weights are divided by the target polygon area such that weights
+#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons.
+#' 
+#' For `normalize = FALSE` the area weighted mean calculation no area is required
+#' as in:
 #' 
-#' # We can apply these weights like...
-#' dplyr::tibble(ida = unique(a_b$ida), 
-#'                    val = c(1, 2, 3, 4)) |>
-#'   dplyr::left_join(a_b, by = "ida") |>
-#'   dplyr::mutate(val = ifelse(is.na(w), NA, val),
-#'                 areasqkm = 1.5 ^ 2) |> # area of each polygon in `a`
-#'   dplyr::group_by(idb) |> # group so we get one row per `b`
-#'   # now we weight by the percent of the area from each polygon in `b` per polygon in `a`
-#'   dplyr::summarize(new_val = sum( (val * w * areasqkm), na.rm = TRUE ) / sum(w * areasqkm))
-#'   
-#' # we can go in reverse if we had data from b that we want sampled to a
+#' > `sum( (val * w), na.rm = TRUE ) / sum(w)`
+#'
+#' See examples for illustration of these two modes.
+#'
+#' @examples
+#'
+#' library(sf)
 #' 
-#' (b_a <- calculate_area_intersection_weights(b, a, normalize = FALSE))
+#' source <- st_sf(source_id = c(1, 2), 
+#'                 val = c(10, 20), 
+#'                 geom = st_as_sfc(c(
+#'   "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))", 
+#'   "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))")))
 #' 
-#' # note that `w` sums to 1 only where `a` complete covers `b`
+#' source$area <- as.numeric(st_area(source))
 #' 
-#' dplyr::summarize(dplyr::group_by(b_a, idb), w = sum(w))
+#' target <- st_sf(target_id = "a", 
+#'                 geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))"))
 #' 
-#' # with `normalize = TRUE`, `w` will sum to 1 when the target 
-#' # completely covers the source rather than when the source completely
-#' # covers the target. 
+#' plot(source['val'], reset = FALSE)
+#' plot(st_geometry(target), add = TRUE)
 #' 
-#' (a_b <- calculate_area_intersection_weights(a, b, normalize = TRUE))
+#' (w <- 
+#' calculate_area_intersection_weights(source[c("source_id", "geom")], 
+#'                                     target[c("target_id", "geom")], 
+#'                                     normalize = FALSE, allow_lonlat = TRUE))
 #' 
-#' dplyr::summarize(dplyr::group_by(a_b, idb), w = sum(w))
+#' (res <-
+#' merge(st_drop_geometry(source), w, by = "source_id"))
 #' 
-#' (b_a <- calculate_area_intersection_weights(b, a, normalize = TRUE))
+#' sum(res$val * res$w * res$area) / sum(res$w * res$area)
 #' 
-#' dplyr::summarize(dplyr::group_by(b_a, ida), w = sum(w))
+#' (w <-
+#' calculate_area_intersection_weights(source[c("source_id", "geom")], 
+#'                                     target[c("target_id", "geom")], 
+#'                                     normalize = TRUE, allow_lonlat = TRUE))
+#' (res <-
+#' merge(st_drop_geometry(source), w, by = "source_id"))
 #' 
-#' # We can apply these weights like...
-#' # Note that we don't need area in the normalized case
-#' dplyr::tibble(ida = unique(a_b$ida), 
-#'                    val = c(1, 2, 3, 4)) |>
-#'   dplyr::left_join(a_b, by = "ida") |>
-#'   dplyr::mutate(val = ifelse(is.na(w), NA, val)) |>
-#'   dplyr::group_by(idb) |> # group so we get one row per `b`
-#'   # now we weight by the percent of the area from each polygon in `b` per polygon in `a`
-#'   dplyr::summarize(new_val = sum( (val * w), na.rm = TRUE ))
-
+#' sum(res$val * res$w) / sum(res$w)
 #'
 #' @export
 #' @importFrom sf st_intersection st_set_geometry st_area st_crs st_drop_geometry
 #' @importFrom dplyr mutate group_by right_join select ungroup left_join mutate
 
 calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = FALSE) {
-  
+
   if(missing(normalize)) {
     warning("Required input normalize is missing, defaulting to FALSE.")
     normalize <- FALSE
@@ -172,27 +138,26 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat =
   int <- areal::aw_intersect(y,
                              source = x,
                              areaVar = "area_intersection")
-  int <- areal::aw_total(int, source = x,
-                         id = "varx", # the unique id in the "source" x
-                         areaVar = "area_intersection",
-                         totalVar = "totalArea_x",
-                         type = "extensive",
-                         weight = "total")
-  int <- areal::aw_weight(int, areaVar = "area_intersection",
-                          totalVar = "totalArea_x",
-                          areaWeight = "areaWeight_x_y")
-
-  int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx")
+
+  if(!normalize) {
 
-  if(normalize) {
+    int <- areal::aw_total(int, 
+                           source = x,
+                           id = "varx", # the unique id in the "source" x
+                           areaVar = "area_intersection",
+                           totalVar = "totalArea_x",
+                           type = "extensive",
+                           weight = "total")
 
-    # for normalized, we return the percent of each target covered by each source
-    int <- areal::aw_intersect(y,
-                               source = x,
-                               areaVar = "area_intersection")
+    int <- areal::aw_weight(int, areaVar = "area_intersection",
+                            totalVar = "totalArea_x",
+                            areaWeight = "areaWeight_x_y")
 
-    # for normalized, we sum the intersection area by the total target intersection area
-    int <- ungroup(mutate(group_by(int, vary), totalArea_y = sum(area_intersection)))
+  } else {
+
+    # for normalized, we sum the intersection area by the total target area
+    int <- left_join(int, data.frame(vary = y$vary, 
+                                     totalArea_y = as.numeric(sf::st_area(y))), by = "vary")
 
     int <- areal::aw_weight(int, 
                             areaVar = "area_intersection",
@@ -203,8 +168,8 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat =
 
   int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx")
 
-  int <- select(int, varx, vary, w = areaWeight_x_y)
-
+  int <- select(int, varx, vary, w = "areaWeight_x_y")
+  
   names(int) <- c(id_x, id_y, "w")
 
   return(dplyr::as_tibble(int))

diff --git a/docs/404.html b/docs/404.html