Skip to content

Unexpected summarise() interaction with sf objects #2207

Open
@cphaarmeyer

Description

@cphaarmeyer

Since {dplyr} 1.1.0 there is the (experimental) .by argument in some verbs as a group-by-alternative. I tripped over an unexpected interaction with summarise()and sf objects.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE

nc <- read_sf(system.file("shape/nc.shp", package = "sf"))
nc$area_cl <- cut(nc$AREA, c(0, .1, .12, .15, .25))

nc |>
  group_by(area_cl) |>
  summarise(mean(AREA))
#> Simple feature collection with 4 features and 2 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> Geodetic CRS:  NAD27
#> # A tibble: 4 × 3
#>   area_cl     `mean(AREA)`                                              geometry
#>   <fct>              <dbl>                                    <MULTIPOLYGON [°]>
#> 1 (0,0.1]           0.0760 (((-76.68874 36.29452, -76.72651 36.15682, -76.69016…
#> 2 (0.1,0.12]        0.112  (((-80.93127 35.61959, -81.00358 35.69706, -81.05478…
#> 3 (0.12,0.15]       0.134  (((-76.46926 34.69328, -76.2877 34.87701, -76.37468 …
#> 4 (0.15,0.25]       0.190  (((-80.76612 35.68204, -80.70597 35.85166, -80.68958…

# geometries are all the same
nc |>
  summarise(mean(AREA), .by = area_cl)
#> Simple feature collection with 4 features and 2 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> Geodetic CRS:  NAD27
#> # A tibble: 4 × 3
#>   area_cl     `mean(AREA)`                                              geometry
#>   <fct>              <dbl>                                    <MULTIPOLYGON [°]>
#> 1 (0.1,0.12]        0.112  (((-75.97629 36.51793, -75.97728 36.47802, -75.9248 …
#> 2 (0,0.1]           0.0760 (((-75.97629 36.51793, -75.97728 36.47802, -75.9248 …
#> 3 (0.12,0.15]       0.134  (((-75.97629 36.51793, -75.97728 36.47802, -75.9248 …
#> 4 (0.15,0.25]       0.190  (((-75.97629 36.51793, -75.97728 36.47802, -75.9248 …

# this ist the expected result
nc |>
  summarise(mean(AREA), across(geometry, st_union), .by = area_cl)
#> Simple feature collection with 4 features and 2 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> Geodetic CRS:  NAD27
#> # A tibble: 4 × 3
#>   area_cl     `mean(AREA)`                                              geometry
#>   <fct>              <dbl>                                    <MULTIPOLYGON [°]>
#> 1 (0.1,0.12]        0.112  (((-80.93127 35.61959, -81.00358 35.69706, -81.05478…
#> 2 (0,0.1]           0.0760 (((-76.68874 36.29452, -76.72651 36.15682, -76.69016…
#> 3 (0.12,0.15]       0.134  (((-76.46926 34.69328, -76.2877 34.87701, -76.37468 …
#> 4 (0.15,0.25]       0.190  (((-80.76612 35.68204, -80.70597 35.85166, -80.68958…

sf_extSoftVersion()
#>           GEOS           GDAL         proj.4 GDAL_with_GEOS     USE_PROJ_H 
#>        "3.9.3"        "3.5.2"        "8.2.1"         "true"         "true" 
#>           PROJ 
#>        "8.2.1"

Created on 2023-08-02 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.3 (2023-03-15 ucrt)
#>  os       Windows 10 x64 (build 19045)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  German_Germany.utf8
#>  ctype    German_Germany.utf8
#>  tz       Europe/Berlin
#>  date     2023-08-02
#>  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  class         7.3-22  2023-05-03 [2] CRAN (R 4.2.3)
#>  classInt      0.4-9   2023-02-28 [1] CRAN (R 4.2.2)
#>  cli           3.6.1   2023-03-23 [1] CRAN (R 4.2.3)
#>  DBI           1.1.3   2022-06-18 [1] CRAN (R 4.2.0)
#>  digest        0.6.33  2023-07-07 [1] CRAN (R 4.2.3)
#>  dplyr       * 1.1.2   2023-04-20 [1] CRAN (R 4.2.3)
#>  e1071         1.7-13  2023-02-01 [1] CRAN (R 4.2.2)
#>  evaluate      0.21    2023-05-05 [1] CRAN (R 4.2.3)
#>  fansi         1.0.4   2023-01-22 [1] CRAN (R 4.2.2)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.2.2)
#>  fs            1.6.3   2023-07-20 [1] CRAN (R 4.2.3)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.2.1)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.2.0)
#>  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.2.3)
#>  KernSmooth    2.23-22 2023-07-10 [2] CRAN (R 4.2.3)
#>  knitr         1.43    2023-05-25 [1] CRAN (R 4.2.3)
#>  lifecycle     1.0.3   2022-10-07 [1] CRAN (R 4.2.1)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.2.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.2.3)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.2.0)
#>  proxy         0.4-27  2022-06-09 [1] CRAN (R 4.2.0)
#>  purrr         1.0.1   2023-01-10 [1] CRAN (R 4.2.2)
#>  R.cache       0.16.0  2022-07-21 [1] CRAN (R 4.2.1)
#>  R.methodsS3   1.8.2   2022-06-13 [1] CRAN (R 4.2.0)
#>  R.oo          1.25.0  2022-06-12 [1] CRAN (R 4.2.0)
#>  R.utils       2.12.2  2022-11-11 [1] CRAN (R 4.2.2)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.2.0)
#>  Rcpp          1.0.11  2023-07-06 [1] CRAN (R 4.2.3)
#>  reprex        2.0.2   2022-08-17 [1] CRAN (R 4.2.1)
#>  rlang         1.1.1   2023-04-28 [1] CRAN (R 4.2.3)
#>  rmarkdown     2.23    2023-07-01 [1] CRAN (R 4.2.3)
#>  rstudioapi    0.15.0  2023-07-07 [1] CRAN (R 4.2.3)
#>  s2            1.1.4   2023-05-17 [1] CRAN (R 4.2.3)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.2.0)
#>  sf          * 1.0-14  2023-07-11 [1] CRAN (R 4.2.3)
#>  styler        1.10.1  2023-06-05 [1] CRAN (R 4.2.3)
#>  tibble        3.2.1   2023-03-20 [1] CRAN (R 4.2.3)
#>  tidyselect    1.2.0   2022-10-10 [1] CRAN (R 4.2.1)
#>  units         0.8-2   2023-04-27 [1] CRAN (R 4.2.3)
#>  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.2.2)
#>  vctrs         0.6.3   2023-06-14 [1] CRAN (R 4.2.3)
#>  withr         2.5.0   2022-03-03 [1] CRAN (R 4.2.0)
#>  wk            0.7.3   2023-05-06 [1] CRAN (R 4.2.3)
#>  xfun          0.39    2023-04-20 [1] CRAN (R 4.2.3)
#>  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.2.2)
#> 
#>  [1] C:/Users/philipp_h/AppData/Local/R/win-library/4.2
#>  [2] C:/Program Files/R/R-4.2.3/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions