diff --git a/02-spatial-data.Rmd b/02-spatial-data.Rmd index fcbcaa4fe..d51d2011b 100644 --- a/02-spatial-data.Rmd +++ b/02-spatial-data.Rmd @@ -821,14 +821,14 @@ library(tmap) tm1 = tm_shape(india_buffer_with_s2) + tm_fill(fill = hcl.colors(4, palette = "purple green")[2], lwd = 0.01) + tm_shape(india) + - tm_fill(fill = "grey95") + + tm_fill(fill = "gray95") + tm_title("st_buffer() with dist = 1") + tm_title("s2 switched on (default)", position = tm_pos_in("right", "bottom"), size = 1) tm2 = tm_shape(india_buffer_without_s2) + tm_fill(fill = hcl.colors(4, palette = "purple green")[3], lwd = 0.01) + tm_shape(india) + - tm_fill(fill = "grey95") + + tm_fill(fill = "gray95") + tm_title(" ") + tm_title("s2 switched off", position = tm_pos_in("right", "bottom"), size = 1) diff --git a/04-spatial-operations.Rmd b/04-spatial-operations.Rmd index 269a64002..987f8fe0c 100644 --- a/04-spatial-operations.Rmd +++ b/04-spatial-operations.Rmd @@ -552,7 +552,7 @@ tm_shape(grid_sf) + tm_fill(fill = c("queens", "rooks"), fill.scale = tm_scale(values = c("white", "black"))) + tm_shape(grid_sf) + - tm_borders(col = "grey", lwd = 2) + + tm_borders(col = "gray", lwd = 2) + tm_layout(frame = FALSE, legend.show = FALSE, panel.labels = c("queen", "rook")) ``` diff --git a/09-mapping.Rmd b/09-mapping.Rmd index 7ce8f7a0f..fe25ad526 100644 --- a/09-mapping.Rmd +++ b/09-mapping.Rmd @@ -1012,9 +1012,9 @@ Building on this basic example and knowing where to find help (see `?shiny`), th The recommended next step is to open the previously mentioned [`CycleHireApp/app.R`](https://github.com/geocompx/geocompr/blob/main/apps/CycleHireApp/app.R) script in an integrated development environment (IDE) of choice, modify it and re-run it repeatedly. The example contains some of the components of a web mapping application implemented in **shiny** and should 'shine' a light on how they behave. -The `CycleHireApp/app.R` script contains **shiny** functions that go beyond those demonstrated in the simple 'lifeApp' example (Figure \@ref(fig:CycleHireApp-html)). -These include `reactive()` and `observe()` (for creating outputs that respond to the user interface --- see `?reactive`) and `leafletProxy()` (for modifying a `leaflet` object that has already been created). -Such elements are critical to the creation of web mapping applications implemented in **shiny**. +The `CycleHireApp/app.R` script contains **shiny** functions that go beyond those demonstrated in the simple 'lifeApp' example, deployed at [shiny.robinlovelace.net/CycleHireApp](https://shiny.robinlovelace.net/CycleHireApp). +These include `reactive()` and `observe()`, (for creating outputs that respond to the user interface, see `?reactive`) and `leafletProxy()` (for modifying a `leaflet` object that has already been created). +Such elements enable web mapping applications implemented in **shiny** [@lovelace_propensity_2017]. A range of 'events' can be programmed including advanced functionality such as drawing new layers or subsetting data, as described in the shiny section of RStudio's **leaflet** [website](https://rstudio.github.io/leaflet/shiny.html). ```{block2 shinynote, type='rmdnote'} @@ -1040,11 +1040,9 @@ For that reason, despite advocating **shiny**, we recommend starting with the lo This way your prototype web applications should be limited not by technical considerations, but by your motivations and imagination. ```{r CycleHireApp-html, echo=FALSE, message=FALSE, fig.cap="CycleHireApp, a simple web mapping application for finding the closest cycle hiring station based on your location and requirement of cycles. Interactive version available online at: r.geocompx.org.",fig.scap="Cycle Hire App, a simple web mapping application.", eval=knitr::is_html_output(), out.width="690"} -knitr::include_app("https://shiny.robinlovelace.net/CycleHireApp/") -``` - -```{r CycleHireApp-latex, echo=FALSE, message=FALSE, fig.cap="CycleHireApp, a simple web mapping application for finding the closest cycle hiring station based on your location and requirement of cycles. Interactive version available online at r.geocompx.org.", fig.scap="coffeeApp, a simple web mapping application.", eval=knitr::is_latex_output()} -knitr::include_graphics("images/09_cycle_hire_app.png") +if (knitr::is_html_output()){ + knitr::include_url("https://shiny.robinlovelace.net/CycleHireApp/") +} ``` ## Other mapping packages diff --git a/10-gis.Rmd b/10-gis.Rmd index 0734e8b91..053ef51d6 100644 --- a/10-gis.Rmd +++ b/10-gis.Rmd @@ -212,11 +212,11 @@ aggzone_wgs = st_transform(aggregating_zones, "EPSG:4326") #| results: hide library(tmap) tm_shape(incongr_wgs) + - tm_polygons(col = "grey5") + + tm_polygons(col = "gray5") + tm_shape(aggzone_wgs) + tm_borders(col_alpha = 0.5, col = "red") + tm_add_legend(type = "lines", labels = c("incongr_wgs", "aggzone_wgs"), - col = c("grey5", "red"), lwd = 3, position = tm_pos_in("right", "top")) + + col = c("gray5", "red"), lwd = 3, position = tm_pos_in("right", "top")) + tm_scalebar(position = c("left", "bottom"), breaks = c(0, 0.5, 1)) + tm_layout(frame = FALSE, legend.text.size = 1, inner.margins = c(0.02, 0.02, 0.03, 0.02)) diff --git a/12-spatial-cv.Rmd b/12-spatial-cv.Rmd index 755da8bdf..6363189c5 100644 --- a/12-spatial-cv.Rmd +++ b/12-spatial-cv.Rmd @@ -26,7 +26,7 @@ library(lgr) # logging framework for R library(mlr3) # unified interface to machine learning algorithms library(mlr3learners) # most important machine learning algorithms library(mlr3extralearners) # access to even more learning algorithms -library(mlr3proba) # make probabilistic predictions, here only needed for mlr3extralearners::list_learners() +library(mlr3proba) # here needed for mlr3extralearners::list_learners() library(mlr3spatiotempcv) # spatiotemporal resampling strategies library(mlr3tuning) # hyperparameter tuning library(mlr3viz) # plotting functions for mlr3 objects @@ -214,7 +214,7 @@ However, it is possible to include spatial autocorrelation\index{autocorrelation Though, this is beyond the scope of this book, we give the interested reader some pointers where to look it up: 1. The predictions of regression kriging combines the predictions of a regression with the kriging of the regression's residuals [@goovaerts_geostatistics_1997; @hengl_practical_2007; @bivand_applied_2013]. -2. One can also add a spatial correlation (dependency) structure to a generalized least squares model (`nlme::gls()`; @zuur_mixed_2009; @zuur_beginners_2017). +2. One can also add a spatial correlation (dependency) structure to a generalized least squares model [`nlme::gls()`, @zuur_mixed_2009; @zuur_beginners_2017]. 3. One can also use mixed-effect modeling approaches. Basically, a random effect imposes a dependency structure on the response variable which in turn allows for observations of one class to be more similar to each other than to those of another class [@zuur_mixed_2009]. Classes can be, for example, bee hives, owl nests, vegetation transects or an altitudinal stratification. @@ -444,7 +444,7 @@ mean(score_spcv_glm$classif.auc) |> round(2) ``` -To put these results in perspective, let us compare them with AUROC\index{AUROC} values from a 100-repeated 5-fold non-spatial cross-validation (Figure \@ref(fig:boxplot-cv); the code for the non-spatial cross-validation\index{cross-validation} is not shown here but will be explored in the exercise section). +To put these results in perspective, let us compare them with AUROC\index{AUROC} values from a 100-repeated 5-fold non-spatial cross-validation (Figure \@ref(fig:boxplot-cv); the code for the non-spatial cross-validation\index{cross-validation} is not shown here but will be explored in the Exercise section). As expected (see Section \@ref(intro-cv)), the spatially cross-validated result yields lower AUROC values on average than the conventional cross-validation approach, underlining the over-optimistic predictive performance of the latter due to its spatial autocorrelation\index{autocorrelation!spatial}. ```{r boxplot-cv, echo=FALSE, message=FALSE, out.width="75%", fig.cap="Boxplot showing the difference in GLM AUROC values on spatial and conventional 100-repeated 5-fold cross-validation.", fig.scap="Boxplot showing AUROC values."} @@ -625,7 +625,7 @@ score_spcv_svm = dplyr::filter(score, learner_id == "classif.ksvm.tuned", resampling_id == "repeated_spcv_coords") ``` -Let us have a look at the final AUROC\index{AUROC}: the model's ability to discriminate the two classes. +Let's have a look at the final AUROC\index{AUROC}: the model's ability to discriminate the two classes. ```{r 12-spatial-cv-33} # final mean AUROC diff --git a/14-location.Rmd b/14-location.Rmd index c0e181f05..cce3d14fb 100644 --- a/14-location.Rmd +++ b/14-location.Rmd @@ -27,7 +27,7 @@ This chapter demonstrates how the skills learned in Parts I and II can be applie This is a broad field of research and commercial application. A typical example of geomarketing is where to locate a new shop. The aim here is to attract most visitors and, ultimately, make the most profit. -There are also many non-commercial applications that can use the technique for public benefit, for example where to locate new health services [@tomintz_geography_2008]. +There are also many non-commercial applications that can use the technique for public benefit, for example, where to locate new health services [@tomintz_geography_2008]. People are fundamental to location analysis\index{location analysis}, in particular where they are likely to spend their time and other resources. Interestingly, ecological concepts and models are quite similar to those used for store location analysis. @@ -44,7 +44,7 @@ Typical research questions include: - Do existing services over- or under-utilize the market potential? - What is the market share of a company in a specific area? -This chapter demonstrates how geocomputation can answer such questions based on a hypothetical case study based on real data. +This chapter demonstrates how geocomputation can answer such questions based on a hypothetical case study and real data. ## Case study: bike shops in Germany {#case-study} @@ -61,7 +61,7 @@ The following sections will demonstrate how the techniques learned during the fi - Convert the tabulated census data into raster\index{raster} objects (Section \@ref(create-census-rasters)) - Identify metropolitan areas with high population densities (Section \@ref(define-metropolitan-areas)) - Download detailed geographic data (from OpenStreetMap\index{OpenStreetMap}, with **osmdata**\index{osmdata (package)}) for these areas (Section \@ref(points-of-interest)) -- Create rasters\index{raster} for scoring the relative desirability of different locations using map algebra\index{map algebra} (Section \@ref(identifying-suitable-locations)) +- Create rasters\index{raster} for scoring the relative desirability of different locations using map algebra\index{map algebra} (Section \@ref(create-census-rasters)) Although we have applied these steps to a specific case study, they could be generalized to many scenarios of store location or public service provision. @@ -86,7 +86,7 @@ data("census_de", package = "spDataLarge") The `census_de` object is a data frame containing 13 variables for more than 360,000 grid cells across Germany. For our work, we only need a subset of these: Easting (`x`) and Northing (`y`), number of inhabitants (population; `pop`), mean average age (`mean_age`), proportion of women (`women`) and average household size (`hh_size`). These variables are selected and renamed from German into English in the code chunk below and summarized in Table \@ref(tab:census-desc). -Further, `mutate()` is used to convert values -1 and -9 (meaning "unknown") to `NA`. +Further, `mutate()` is used to convert values `-1` and `-9` (meaning "unknown") to `NA`. ```{r 14-location-4} # pop = population, hh_size = household size @@ -131,11 +131,10 @@ tab = dplyr::tribble( # summary(input_factor) cap = paste("Categories for each variable in census data from", "Datensatzbeschreibung...xlsx", - "located in the downloaded file census.zip (see Figure", - "\\@ref(fig:census-stack) for their spatial distribution).") + "located in the downloaded file census.zip.") knitr::kable(tab, - col.names = c("Class", "Population", "% female", "Mean age", - "Household size"), + col.names = c("Class", "Population", "% Female", "Mean Age", + "Household Size"), caption = cap, caption.short = "Categories for each variable in census data.", align = "c", booktabs = TRUE) @@ -156,12 +155,12 @@ input_ras ``` ```{block2 14-location-7, type='rmdnote'} -Note that we are using an equal-area projection (EPSG:3035; Lambert Equal Area Europe), i.e., a projected CRS\index{CRS!projected} where each grid cell has the same area, here 1000 x 1000 square meters. +Note that we are using an equal-area projection (EPSG:3035; Lambert Equal Area Europe), i.e., a projected CRS\index{CRS!projected} where each grid cell has the same area, here 1000 * 1000 square meters. Since we are using mainly densities such as the number of inhabitants or the portion of women per grid cell, it is of utmost importance that the area of each grid cell is the same to avoid 'comparing apples and oranges'. -Be careful with geographic CRS\index{CRS!geographic} where grid cell areas constantly decrease in poleward directions (see also Section \@ref(crs-intro) and Chapter \@ref(reproj-geo-data)). +Be careful with geographic CRS\index{CRS!geographic} where grid cell areas constantly decrease in poleward directions (see also Section \@ref(crs-intro) and Chapter \@ref(reproj-geo-data)). ``` -```{r census-stack, echo=FALSE, fig.cap="Gridded German census data of 2011 (see Table \\@ref(tab:census-desc) for a description of the classes).", fig.scap="Gridded German census data."} +```{r census-stack, echo=FALSE, fig.cap="Gridded German census data of 2011 (see Table 14.1 for a description of the classes).", fig.scap="Gridded German census data."} knitr::include_graphics("images/14_census_stack.png") ``` @@ -172,9 +171,9 @@ A cell value of 8000 inhabitants was chosen for 'class 6' because these cells co Of course, these are approximations of the true population, not precise values.^[ The potential error introduced during this reclassification stage will be explored in the exercises. ] -However, the level of detail is sufficient to delineate metropolitan areas (see next section). +However, the level of detail is sufficient to delineate metropolitan areas (see Section \@ref(define-metropolitan-areas)). -In contrast to the `pop` variable, representing absolute estimates of the total population, the remaining variables were re-classified as weights corresponding with weights used in the survey. +In contrast to the `pop` variable, representing absolute estimates of the total population, the remaining variables were reclassified as weights corresponding with weights used in the survey. Class 1 in the variable `women`, for instance, represents areas in which 0 to 40% of the population is female; these are reclassified with a comparatively high weight of 3 because the target demographic is predominantly male. Similarly, the classes containing the youngest people and highest proportion of single households are reclassified to have high weights. @@ -216,7 +215,7 @@ reclass # full output not shown We deliberately define metropolitan areas as pixels of 20 km^2^ inhabited by more than 500,000 people. Pixels at this coarse resolution can rapidly be created using `aggregate()`\index{aggregation}, as introduced in Section \@ref(aggregation-and-disaggregation). -The command below uses the argument `fact = 20` to reduce the resolution of the result twenty-fold (recall the original raster resolution was 1 km^2^). +The command below uses the argument `fact = 20` to reduce the resolution of the result 20-fold (recall the original raster resolution was 1 km^2^). ```{r 14-location-11, warning=FALSE, cache=TRUE, cache.lazy=FALSE} pop_agg = aggregate(reclass$pop, fact = 20, fun = sum, na.rm = TRUE) @@ -266,21 +265,21 @@ To make sure that the reader uses the exact same results, we have put them into ```{r metro-names, echo=FALSE} data("metro_names", package = "spDataLarge") -knitr::kable(select(metro_names, city, state), +knitr::kable(select(metro_names, City = city, State = state), caption = "Result of the reverse geocoding.", caption.short = "Result of the reverse geocoding.", booktabs = TRUE) ``` -Overall, we are satisfied with the `city` column serving as metropolitan names (Table \@ref(tab:metro-names)) apart from one exception, namely Velbert which belongs to the greater region of Düsseldorf. +Overall, we are satisfied with the `City` column serving as metropolitan names (Table \@ref(tab:metro-names)) apart from one exception, namely Velbert which belongs to the greater region of Düsseldorf. Hence, we replace Velbert with Düsseldorf (Figure \@ref(fig:metro-areas)). Umlauts like `ü` might lead to trouble further on, for example when determining the bounding box of a metropolitan area with `opq()` (see further below), which is why we avoid them. ```{r 14-location-19} metro_names = metro_names$city |> as.character() |> - {\(x) ifelse(x == "Velbert", "Düsseldorf", x)}() |> - {\(x) gsub("ü", "ue", x)}() + (\(x) ifelse(x == "Velbert", "Düsseldorf", x))() |> + gsub("ü", "ue", x = _) ``` ## Points of interest @@ -296,9 +295,9 @@ The subsequent code chunk does this using a number of functions including: - `while()`\index{loop!while}, which tries two more times to download the data if the download failed the first time^[The OSM-download will sometimes fail at the first attempt. ] -Before running this code: please consider it will download almost 2GB of data. +Before running this code, please consider it will download almost two GB of data. To save time and resources, we have put the output named `shops` into **spDataLarge**. -To make it available in your environment run `data("shops", package = "spDataLarge")`. +To make it available in your environment, run `data("shops", package = "spDataLarge")`. ```{r 14-location-20, eval=FALSE, message=FALSE} shops = purrr::map(metro_names, function(x) { @@ -332,7 +331,7 @@ if (any(ind)) { } ``` -To make sure that each list element (an `sf`\index{sf} data frame) comes with the same columns^[This is not a given since OSM contributors are not equally meticulous when collecting data.] we only keep the `osm_id` and the `shop` columns with the help of the `map_dfr` loop which additionally combines all shops into one large `sf`\index{sf} object. +To make sure that each list element (an `sf`\index{sf} data frame) comes with the same columns^[This is not a given since OSM contributors are not equally meticulous when collecting data.], we only keep the `osm_id` and the `shop` columns with the help of the `map_dfr` loop which additionally combines all shops into one large `sf`\index{sf} object. ```{r 14-location-22, eval=FALSE} # select only specific columns @@ -381,11 +380,10 @@ poi = classify(poi, rcl = rcl_poi, right = NA) names(poi) = "poi" ``` -## Identifying suitable locations +## Identify suitable locations The only steps that remain before combining all the layers are to add `poi` to the `reclass` raster stack and remove the population layer from it. -The reasoning for the latter is twofold. -First of all, we have already delineated metropolitan areas, that is areas where the population density is above average compared to the rest of Germany. +The reasoning for the latter is: First of all, we have already delineated metropolitan areas, that is areas where the population density is above average compared to the rest of Germany. Second, though it is advantageous to have many potential customers within a specific catchment area\index{catchment area}, the sheer number alone might not actually represent the desired target group. For instance, residential tower blocks are areas with a high population density but not necessarily with a high purchasing power for expensive cycle components. @@ -430,14 +428,14 @@ if (knitr::is_latex_output()) { The presented approach is a typical example of the normative usage of a GIS\index{GIS} [@longley_geographic_2015]. We combined survey data with expert-based knowledge and assumptions (definition of metropolitan areas, defining class intervals, definition of a final score threshold). -This approach is less suitable for scientific research than applied analysis that provides an evidence based indication of areas suitable for bike shops that should be compared with other sources of information. +This approach is less suitable for scientific research than applied analysis that provides an evidence-based indication of areas suitable for bike shops that should be compared with other sources of information. A number of changes to the approach could improve the analysis: - We used equal weights when calculating the final scores but other factors, such as the household size, could be as important as the portion of women or the mean age - We used all points of interest\index{point of interest} but only those related to bike shops, such as do-it-yourself, hardware, bicycle, fishing, hunting, motorcycles, outdoor and sports shops (see the range of shop values available on the [OSM Wiki](https://wiki.openstreetmap.org/wiki/Map_Features#Shop)) may have yielded more refined results -- Data at a higher resolution may improve the output (see exercises) +- Data at a higher resolution may improve the output (see Exercises) - We have used only a limited set of variables and data from other sources, such as the [INSPIRE geoportal](https://inspire-geoportal.ec.europa.eu/) or data on cycle paths from OpenStreetMap, may enrich the analysis (see also Section \@ref(retrieving-data)) -- Interactions remained unconsidered, such as a possible relationships between the portion of men and single households +- Interactions remained unconsidered, such as a possible relationship between the portion of men and single households In short, the analysis could be extended in multiple directions. Nevertheless, it should have given you a first impression and understanding of how to obtain and deal with spatial data in R\index{R} within a geomarketing\index{geomarketing} context. diff --git a/15-eco.Rmd b/15-eco.Rmd index 46f339b23..83ab98d1e 100644 --- a/15-eco.Rmd +++ b/15-eco.Rmd @@ -7,7 +7,7 @@ source("code/before_script.R") ## Prerequisites {-} This chapter assumes you have a strong grasp of geographic data analysis and processing, covered in Chapters \@ref(spatial-class) to \@ref(geometry-operations). -The chapter makes use of bridges to GIS software, and spatial cross-validation, covered in Chapters \@ref(gis) and \@ref(spatial-cv) respectively. +The chapter makes use of bridges to GIS software, and spatial cross-validation, covered in Chapters \@ref(gis) and \@ref(spatial-cv), respectively. The chapter uses the following packages: @@ -42,11 +42,11 @@ Unfortunately, fog oases are heavily endangered, primarily due to agriculture an Evidence on the composition and spatial distribution of the native flora can support efforts to protect remaining fragments of fog oases [@muenchow_predictive_2013; @muenchow_soil_2013]. In this chapter you will analyze the composition and the spatial distribution of vascular plants (here referring mostly to flowering plants) on the southern slope of Mt. Mongón, a *lomas* mountain near Casma on the central northern coast of Peru (Figure \@ref(fig:study-area-mongon)). -During a field study to Mt. Mongón, all vascular plants living in 100 randomly sampled 4x4 m^2^ plots in the austral winter of 2011 were recorded [@muenchow_predictive_2013]. +During a field study to Mt. Mongón, all vascular plants living in 100 randomly sampled 4 by 4 m^2^ plots in the austral winter of 2011 were recorded [@muenchow_predictive_2013]. The sampling coincided with a strong La Niña event that year, as shown in data published by the National Oceanic and Atmospheric Administration ([NOAA](https://origin.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/ONI_v5.php)). This led to even higher levels of aridity than usual in the coastal desert and increased fog activity on the southern slopes of Peruvian *lomas* mountains. -```{r study-area-mongon, echo=FALSE, fig.cap="The Mt. Mongón study area, from Muenchow, Schratz, and Brenning (2017).", out.width="60%", fig.scap="The Mt. Mongón study area."} +```{r study-area-mongon, echo=FALSE, fig.cap="The Mt. Mongón study area. Figure taken from Muenchow, Schratz, and Brenning (2017).", out.width="60%", fig.scap="The Mt. Mongón study area."} knitr::include_graphics("images/15_study_area_mongon.png") # knitr::include_graphics("https://user-images.githubusercontent.com/1825120/38989956-6eae7c9a-43d0-11e8-8f25-3dd3594f7e74.png") ``` @@ -120,7 +120,7 @@ knitr::include_graphics("images/15_sa_mongon_sampling.png") The next step is to compute variables which are not only needed for the modeling and predictive mapping (see Section \@ref(predictive-mapping)) but also for aligning the non-metric multidimensional scaling (NMDS)\index{NMDS} axes with the main gradient in the study area, altitude and humidity, respectively (see Section \@ref(nmds)). Specifically, we compute catchment slope and catchment area\index{catchment area} from a digital elevation model\index{digital elevation model} using R-GIS bridges (see Chapter \@ref(gis)). -Curvatures might also represent valuable predictors, and in the exercise section you can find out how they would impact the modeling result. +Curvatures might also represent valuable predictors, and in the Exercise section you can find out how they would impact the modeling result. To compute catchment area\index{catchment area} and catchment slope, we can make use of the `sagang:sagawetnessindex` function.^[Admittedly, it is a bit unsatisfying that the only way of knowing that `sagawetnessindex` computes the desired terrain attributes is to be familiar with SAGA\index{SAGA}.] `qgis_show_help()` returns all function\index{function} parameters and default values of a specific geoalgorithm\index{geoalgorithm}. @@ -181,7 +181,7 @@ ep = qgisprocess::qgis_run_algorithm( ``` This returns a list named `ep` containing the paths to the computed output rasters. -Let's read in catchment area as well as catchment slope into a multi-layer `SpatRaster` object (see Section \@ref(raster-classes)). +Let's read-in catchment area as well as catchment slope into a multi-layer `SpatRaster` object (see Section \@ref(raster-classes)). Additionally, we will add two more raster objects to it, namely `dem` and `ndvi`. ```{r 15-eco-7, eval=FALSE} @@ -210,7 +210,6 @@ ep = rast(system.file("raster/ep.tif", package = "spDataLarge")) Finally, we can extract the terrain attributes to our field observations (see also Section \@ref(raster-extraction)). ```{r 15-eco-10, cache=TRUE, cache.lazy=FALSE, message=FALSE, warning=FALSE} -# terra::extract adds automatically a for our purposes unnecessary ID column ep_rp = terra::extract(ep, random_points, ID = FALSE) random_points = cbind(random_points, ep_rp) ``` @@ -219,21 +218,21 @@ random_points = cbind(random_points, ep_rp) Ordinations\index{ordination} are a popular tool in vegetation science to extract the main information, frequently corresponding to ecological gradients, from large species-plot matrices mostly filled with 0s. However, they are also used in remote sensing\index{remote sensing}, the soil sciences, geomarketing\index{geomarketing} and many other fields. -If you are unfamiliar with ordination\index{ordination} techniques or in need of a refresher, have a look at Michael W. Palmer's [web page](https://ordination.okstate.edu/overview.htm) for a short introduction to popular ordination techniques in ecology and at @borcard_numerical_2011 for a deeper look on how to apply these techniques in R. +If you are unfamiliar with ordination\index{ordination} techniques or in need of a refresher, have a look at [Michael W. Palmer's webpage](https://ordination.okstate.edu/overview.htm) for a short introduction to popular ordination techniques in ecology and at @borcard_numerical_2011 for a deeper look on how to apply these techniques in R. **vegan**'s\index{vegan (package)} package documentation is also a very helpful resource (`vignette(package = "vegan")`). Principal component analysis (PCA\index{PCA}) is probably the most famous ordination\index{ordination} technique. It is a great tool to reduce dimensionality if one can expect linear relationships between variables, and if the joint absence of a variable in two plots (observations) can be considered a similarity. This is barely the case with vegetation data. -For one, the presence of a plant often follows a unimodal, i.e. a non-linear, relationship along a gradient (e.g., humidity, temperature or salinity) with a peak at the most favorable conditions and declining ends towards the unfavorable conditions. +For one, the presence of a plant often follows a unimodal, i.e., a non-linear, relationship along a gradient (e.g., humidity, temperature or salinity) with a peak at the most favorable conditions and declining ends towards the unfavorable conditions. Secondly, the joint absence of a species in two plots is hardly an indication for similarity. -Suppose a plant species is absent from the driest (e.g., an extreme desert) and the most moistest locations (e.g., a tree savanna) of our sampling. +Suppose a plant species is absent from the driest (e.g., an extreme desert) and the moistest locations (e.g., a tree savanna) of our sampling. Then we really should refrain from counting this as a similarity because it is very likely that the only thing these two completely different environmental settings have in common in terms of floristic composition is the shared absence of species (except for rare ubiquitous species). -Non-metric multidimensional scaling (NMDS\index{NMDS}) is one popular dimension-reducing technique used in ecology [@vonwehrden_pluralism_2009]. -NMDS\index{NMDS} reduces the rank-based differences between the distances between objects in the original matrix and distances between the ordinated objects. +NMDS\index{NMDS} is one popular dimension-reducing technique used in ecology [@vonwehrden_pluralism_2009]. +NMDS\index{NMDS} reduces the rank-based differences between distances between objects in the original matrix and distances between the ordinated objects. The difference is expressed as stress. The lower the stress value, the better the ordination, i.e., the low-dimensional representation of the original matrix. Stress values lower than 10 represent an excellent fit, stress values of around 15 are still good, and values greater than 20 represent a poor fit [@mccune_analysis_2002]. @@ -362,7 +361,7 @@ plot(fit_2) ``` The scores of the first NMDS\index{NMDS} axis represent the different vegetation formations, i.e., the floristic gradient, appearing along the slope of Mt. Mongón. -To spatially visualize them, we can model the NMDS\index{NMDS} scores with the previously created predictors (Section \@ref(data-and-data-preparation)), and use the resulting model for predictive mapping (see next section). +To spatially visualize them, we can model the NMDS\index{NMDS} scores with the previously created predictors (Section \@ref(data-and-data-preparation)), and use the resulting model for predictive mapping (see Section \@ref(modeling-the-floristic-gradient)). ## Modeling the floristic gradient @@ -406,8 +405,8 @@ The first internal node at the top of the tree assigns all observations which ar The observations falling into the left branch have a mean NMDS\index{NMDS} score of -1.198. Overall, we can interpret the tree as follows: the higher the elevation, the higher the NMDS\index{NMDS} score becomes. This means that the simple decision tree has already revealed four distinct floristic assemblages. -For a more in-depth interpretation please refer to the \@ref(predictive-mapping) section. -Decision trees have a tendency to overfit\index{overfitting}, that is they mirror too closely the input data including its noise which in turn leads to bad predictive performances (Section \@ref(intro-cv); @james_introduction_2013). +For a more in-depth interpretation, please refer to Section \@ref(predictive-mapping). +Decision trees have a tendency to overfit\index{overfitting}, that is, they mirror too closely the input data including its noise which in turn leads to bad predictive performances [Section \@ref(intro-cv), @james_introduction_2013]. Bootstrap aggregation (bagging) is an ensemble technique that can help to overcome this problem. Ensemble techniques simply combine the predictions of multiple models. Thus, bagging takes repeated samples from the same input data and averages the predictions. @@ -428,10 +427,10 @@ The code in this section largely follows the steps we have introduced in Section The only differences are the following: 1. The response variable is numeric, hence a regression\index{regression} task will replace the classification\index{classification} task of Section \@ref(svm) -1. Instead of the AUROC\index{AUROC} which can only be used for categorical response variables, we will use the root mean squared error (RMSE\index{RMSE}) as performance measure -1. We use a random forest\index{random forest} model instead of a support vector machine\index{SVM} which naturally goes along with different hyperparameters\index{hyperparameter} +1. Instead of the AUROC\index{AUROC} which can only be used for categorical response variables, we will use the root mean squared error (RMSE\index{RMSE}) as the performance measure +1. We use a Random Forest\index{random forest} model instead of a Support Vector Machine\index{SVM} which naturally goes along with different hyperparameters\index{hyperparameter} 1. We are leaving the assessment of a bias-reduced performance measure as an exercise to the reader (see Exercises). -Instead we show how to tune hyperparameters\index{hyperparameter} for (spatial) predictions +Instead, we show how to tune hyperparameters\index{hyperparameter} for (spatial) predictions Remember that 125,500 models were necessary to retrieve bias-reduced performance estimates when using 100-repeated 5-fold spatial cross-validation\index{cross-validation!spatial CV} and a random search of 50 iterations in Section \@ref(svm). In the hyperparameter\index{hyperparameter} tuning level, we found the best hyperparameter combination which in turn was used in the outer performance level for predicting the test data of a specific spatial partition (see also Figure \@ref(fig:inner-outer)). @@ -440,12 +439,12 @@ Which one should we use for making spatial distribution maps? The answer is simple: none at all. Remember, the tuning was done to retrieve a bias-reduced performance estimate, not to do the best possible spatial prediction. For the latter, one estimates the best hyperparameter\index{hyperparameter} combination from the complete dataset. -This means, the inner hyperparameter\index{hyperparameter} tuning level is no longer needed which makes perfect sense since we are applying our model to new data (unvisited field observations) for which the true outcomes are unavailable, hence testing is impossible in any case. +This means, the inner hyperparameter\index{hyperparameter} tuning level is no longer needed, which makes perfect sense since we are applying our model to new data (unvisited field observations) for which the true outcomes are unavailable, hence testing is impossible in any case. Therefore, we tune the hyperparameters\index{hyperparameter} for a good spatial prediction on the complete dataset via a 5-fold spatial CV\index{cross-validation!spatial CV} with one repetition. Having already constructed the input variables (`rp`), we are all set for specifying the **mlr3**\index{mlr3 (package)} building blocks (task, learner, and resampling). -For specifying a spatial task, we use again the **mlr3spatiotempcv** package [@schratz_mlr3spatiotempcv_2021 & Section \@ref(spatial-cv-with-mlr3)], and since our response (`sc`) is numeric, we use a regression\index{regression} task. +For specifying a spatial task, we use again the **mlr3spatiotempcv** package [@schratz_mlr3spatiotempcv_2021 Section \@ref(spatial-cv-with-mlr3)], and since our response (`sc`) is numeric, we use a regression\index{regression} task. ```{r 15-eco-20} # create task @@ -457,14 +456,14 @@ task = mlr3spatiotempcv::as_task_regr_st( ``` Using an `sf` object as the backend automatically provides the geometry information needed for the spatial partitioning later on. -Additionally, we got rid of the columns `id` and `spri` since these variables should not be used as predictors in the modeling. -Next, we go on to construct the a random forest\index{random forest} learner from the **ranger** package [@wright_ranger_2017]. +Additionally, we got rid of the columns `id` and `spri`, since these variables should not be used as predictors in the modeling. +Next, we go on to construct a random forest\index{random forest} learner from the **ranger** package [@wright_ranger_2017]. ```{r 15-eco-21} lrn_rf = lrn("regr.ranger", predict_type = "response") ``` -As opposed to, for example, support vector machines\index{SVM} (see Section \@ref(svm)), random forests often already show good performances when used with the default values of their hyperparameters (which may be one reason for their popularity). +As opposed to, for example, Support Vector Machines\index{SVM} (see Section \@ref(svm)), random forests often already show good performances when used with the default values of their hyperparameters (which may be one reason for their popularity). Still, tuning often moderately improves model results, and thus is worth the effort [@probst_hyperparameters_2018]. In random forests\index{random forest}, the hyperparameters\index{hyperparameter} `mtry`, `min.node.size` and `sample.fraction` determine the degree of randomness, and should be tuned [@probst_hyperparameters_2018]. `mtry` indicates how many predictor variables should be used in each tree. @@ -488,7 +487,7 @@ search_space = paradox::ps( Having defined the search space, we are all set for specifying our tuning via the `AutoTuner()` function. Since we deal with geographic data, we will again make use of spatial cross-validation to tune the hyperparameters\index{hyperparameter} (see Sections \@ref(intro-cv) and \@ref(spatial-cv-with-mlr3)). -Specifically, we will use a five-fold spatial partitioning with only one repetition (`rsmp()`). +Specifically, we will use a 5-fold spatial partitioning with only one repetition (`rsmp()`). In each of these spatial partitions, we run 50 models (`trm()`) while using randomly selected hyperparameter configurations (`tnr()`) within predefined limits (`seach_space`) to find the optimal hyperparameter\index{hyperparameter} combination [see also Section \@ref(svm) and https://mlr3book.mlr-org.com/chapters/chapter4/hyperparameter_optimization.html#sec-autotuner, @bischl_applied_2024]. The performance measure is the root mean squared error (RMSE\index{RMSE}). @@ -586,13 +585,13 @@ all(values(pred - pred_2) == 0) The predictive mapping clearly reveals distinct vegetation belts (Figure \@ref(fig:rf-pred)). Please refer to @muenchow_soil_2013 for a detailed description of vegetation belts on *lomas* mountains. -The blue color tones represent the so-called *Tillandsia*-belt. +The blue color tones represent the so-called *Tillandsia* belt. *Tillandsia* is a highly adapted genus especially found in high quantities at the sandy and quite desertic foot of *lomas* mountains. -The yellow color tones refer to a herbaceous vegetation belt with a much higher plant cover compared to the *Tillandsia*-belt. +The yellow color tones refer to a herbaceous vegetation belt with a much higher plant cover compared to the *Tillandsia* belt. The orange colors represent the bromeliad belt, which features the highest species richness and plant cover. It can be found directly beneath the temperature inversion (ca. 750-850 m asl) where humidity due to fog is highest. Water availability naturally decreases above the temperature inversion, and the landscape becomes desertic again with only a few succulent species (succulent belt; red colors). -Interestingly, the spatial prediction clearly reveals that the bromeliad belt is interrupted which is a very interesting finding we would have not detected without the predictive mapping. +Interestingly, the spatial prediction clearly reveals that the bromeliad belt is interrupted, which is a very interesting finding we would have not detected without the predictive mapping. ## Conclusions @@ -606,15 +605,16 @@ Since *lomas* mountains are heavily endangered, the prediction map can serve as In terms of methodology, a few additional points could be addressed: - It would be interesting to also model the second ordination\index{ordination} axis, and to subsequently find an innovative way of visualizing jointly the modeled scores of the two axes in one prediction map -- If we were interested in interpreting the model in an ecologically meaningful way, we should probably use (semi-)parametric models [@muenchow_predictive_2013;@zuur_mixed_2009;@zuur_beginners_2017]/ +- If we were interested in interpreting the model in an ecologically meaningful way, we should probably use (semi-)parametric models [@muenchow_predictive_2013;@zuur_mixed_2009;@zuur_beginners_2017]. However, there are at least approaches that help to interpret machine learning models such as random forests\index{random forest} (see, e.g., [https://mlr-org.com/posts/2018-04-30-interpretable-machine-learning-iml-and-mlr/](https://mlr-org.com/posts/2018-04-30-interpretable-machine-learning-iml-and-mlr/)) - A sequential model-based optimization (SMBO) might be preferable to the random search for hyperparameter\index{hyperparameter} optimization used in this chapter [@probst_hyperparameters_2018] -Finally, please note that random forest\index{random forest} and other machine learning\index{machine learning} models are frequently used in a setting with lots of observations and many predictors, much more than used in this chapter, and where it is unclear which variables and variable interactions contribute to explaining the response. +Finally, please note that random forest\index{random forest} and other machine learning\index{machine learning} models are frequently used in a setting with much more observations and predictors than used in this Chapter, and where it is unclear which variables and variable interactions contribute to explaining the response. Additionally, the relationships might be highly non-linear. -In our use case, the relationship between response and predictors are pretty clear, there is only a slight amount of non-linearity and the number of observations and predictors is low. +In our use case, the relationship between response and predictors is pretty clear, there is only a slight amount of non-linearity and the number of observations and predictors is low. Hence, it might be worth trying a linear model\index{regression!linear}. -A linear model is much easier to explain and understand than a random forest\index{random forest} model, and therefore to be preferred (law of parsimony), additionally it is computationally less demanding (see Exercises). +A linear model is much easier to explain and understand than a random forest\index{random forest} model, and therefore preferred (law of parsimony). +Additionally it is computationally less demanding (see Exercises). If the linear model cannot cope with the degree of non-linearity present in the data, one could also try a generalized additive model\index{generalized additive model} (GAM). The point here is that the toolbox of a data scientist consists of more than one tool, and it is your responsibility to select the tool best suited for the task or purpose at hand. Here, we wanted to introduce the reader to random forest\index{random forest} modeling and how to use the corresponding results for predictive mapping purposes. diff --git a/_02-ex.Rmd b/_02-ex.Rmd index c42b4785b..fe29d0fed 100644 --- a/_02-ex.Rmd +++ b/_02-ex.Rmd @@ -68,9 +68,9 @@ text(world_coords, world$iso_a2) # Alternative answer: nigeria = world[world$name_long == "Nigeria", ] africa = world[world$continent == "Africa", ] -plot(st_geometry(nigeria), col = "white", lwd = 3, main = "Nigeria in context", border = "lightgrey", expandBB = c(0.5, 0.2, 0.5, 0.2)) -plot(st_geometry(world), lty = 3, add = TRUE, border = "grey") -plot(st_geometry(nigeria), col = "yellow", add = TRUE, border = "darkgrey") +plot(st_geometry(nigeria), col = "white", lwd = 3, main = "Nigeria in context", border = "lightgray", expandBB = c(0.5, 0.2, 0.5, 0.2)) +plot(st_geometry(world), lty = 3, add = TRUE, border = "gray") +plot(st_geometry(nigeria), col = "yellow", add = TRUE, border = "darkgray") a = africa[grepl("Niger", africa$name_long), ] ncentre = st_centroid(a) ncentre_num = st_coordinates(ncentre) diff --git a/_04-ex.Rmd b/_04-ex.Rmd index 97a748267..558d6e210 100644 --- a/_04-ex.Rmd +++ b/_04-ex.Rmd @@ -74,13 +74,13 @@ The starting point of this exercise is to create an object representing Colorado ```{r 04-ex-4-1} colorado = us_states[us_states$NAME == "Colorado", ] plot(us_states$geometry) -plot(colorado$geometry, col = "grey", add = TRUE) +plot(colorado$geometry, col = "gray", add = TRUE) ``` ```{r 04-ex-4-2} intersects_with_colorado = us_states[colorado, , op = st_intersects] plot(us_states$geometry, main = "States that intersect with Colorado") -plot(intersects_with_colorado$geometry, col = "grey", add = TRUE) +plot(intersects_with_colorado$geometry, col = "gray", add = TRUE) ``` ```{r 04-ex-4-3} @@ -103,7 +103,7 @@ us_states |> ```{r 04-ex-4-4} touches_colorado = us_states[colorado, , op = st_touches] plot(us_states$geometry, main = "States that touch Colorado") -plot(touches_colorado$geometry, col = "grey", add = TRUE) +plot(touches_colorado$geometry, col = "gray", add = TRUE) ``` @@ -116,7 +116,7 @@ washington_to_cali = us_states |> states_crossed = us_states[washington_to_cali, , op = st_crosses] states_crossed$NAME plot(us_states$geometry, main = "States crossed by a straight line\n from the District of Columbia to central California") -plot(states_crossed$geometry, col = "grey", add = TRUE) +plot(states_crossed$geometry, col = "gray", add = TRUE) plot(washington_to_cali, add = TRUE) ``` diff --git a/_09-ex.Rmd b/_09-ex.Rmd index 13429d177..b4212baf8 100644 --- a/_09-ex.Rmd +++ b/_09-ex.Rmd @@ -178,9 +178,9 @@ tmap_animation(tma1, filename = "tma2.gif", width = 1000, height = 1000) browseURL("tma1.gif") tma2 = tm_shape(africa) + - tm_polygons(fill = "lightgrey") + + tm_polygons(fill = "lightgray") + tm_shape(ea) + - tm_polygons(fill = "darkgrey") + + tm_polygons(fill = "darkgray") + tm_shape(ea) + tm_polygons(fill = "HDI") + tm_facets(by = "name", nrow = 1, ncol = 1) diff --git a/_14-ex.Rmd b/_14-ex.Rmd index aed39aefe..5aac9a907 100644 --- a/_14-ex.Rmd +++ b/_14-ex.Rmd @@ -13,7 +13,7 @@ library(spDataLarge) E1. Download the csv file containing inhabitant information for a 100 m cell resolution (https://www.zensus2011.de/SharedDocs/Downloads/DE/Pressemitteilung/DemografischeGrunddaten/csv_Bevoelkerung_100m_Gitter.zip?__blob=publicationFile&v=3). Please note that the unzipped file has a size of 1.23 GB. -To read it into R you can use `readr::read_csv`. +To read it into R, you can use `readr::read_csv`. This takes 30 seconds on a machine with 16 GB RAM. `data.table::fread()` might be even faster, and returns an object of class `data.table()`. Use `dplyr::as_tibble()` to convert it into a tibble. diff --git a/_15-ex.Rmd b/_15-ex.Rmd index f036fb5e0..13ac827da 100644 --- a/_15-ex.Rmd +++ b/_15-ex.Rmd @@ -129,7 +129,7 @@ E3. Retrieve the bias-reduced RMSE of a random forest\index{random forest} and a The random forest modeling should include the estimation of optimal hyperparameter\index{hyperparameter} combinations (random search with 50 iterations) in an inner tuning loop. Parallelize\index{parallelization} the tuning level. Report the mean RMSE\index{RMSE} and use a boxplot to visualize all retrieved RMSEs. -Please not that this exercise is best solved using the mlr3 functions `benchmark_grid()` and `benchmark()` (see https://mlr3book.mlr-org.com/perf-eval-cmp.html#benchmarking for more information). +Please note that this exercise is best solved using the mlr3 functions `benchmark_grid()` and `benchmark()` (see https://mlr3book.mlr-org.com/perf-eval-cmp.html#benchmarking for more information). ```{r 15-ex-e3, message=FALSE, eval=FALSE} library(dplyr) diff --git a/_404.Rmd b/_404.Rmd index 0eb2e497f..23ed92b89 100644 --- a/_404.Rmd +++ b/_404.Rmd @@ -7,9 +7,9 @@ null_island = st_point(c(0, 0)) null_island = st_sfc(null_island, crs = 4326) null_island = st_sf(name = "Null Island", geom = null_island) tm_shape(null_island) + - tm_graticules(labels.col = "grey40") + + tm_graticules(labels.col = "gray40") + tm_text("name", size = 5, fontface = "italic") + tm_layout(bg.color = "lightblue") + - tm_title("You are here:", color = "grey40") + tm_title("You are here:", color = "gray40") ``` diff --git a/code/02-contpop.R b/code/02-contpop.R index 9754c5293..04526f261 100644 --- a/code/02-contpop.R +++ b/code/02-contpop.R @@ -6,6 +6,6 @@ par(mar = c(0, 0, 0, 0)) plot(world_proj["continent"], reset = FALSE, main = "", key.pos = NULL) g = st_graticule() g = st_transform(g, crs = "+proj=eck4") -plot(g$geometry, add = TRUE, col = "lightgrey") +plot(g$geometry, add = TRUE, col = "lightgray") cex = sqrt(world$pop) / 10000 plot(st_geometry(world_cents), add = TRUE, cex = cex, lwd = 2, graticule = TRUE) diff --git a/code/02-vectorplots.R b/code/02-vectorplots.R index b2094a311..4471e02d8 100644 --- a/code/02-vectorplots.R +++ b/code/02-vectorplots.R @@ -14,7 +14,7 @@ london_orign = rbind(london_osgb, origin_osgb) png("images/vector_lonlat.png") globe::globeearth(eye = c(0, 0)) gratmat = st_coordinates(st_graticule())[, 1:2] -globe::globelines(loc = gratmat, col = "grey", lty = 3) +globe::globelines(loc = gratmat, col = "gray", lty = 3) globe::globelines(loc = matrix(c(-90, 90, 0, 0), ncol = 2)) globe::globelines(loc = matrix(c(0, 0, -90, 90), ncol = 2)) globe::globepoints(loc = c(-0.1, 51.5), pch = 4, cex = 2, lwd = 3, col = "red") @@ -28,6 +28,6 @@ uk = rnaturalearth::ne_countries(scale = 50) %>% plot(uk$geometry) plot(london_orign$geometry[1], add = TRUE, pch = 4, cex = 2, lwd = 3, col = "red") plot(london_orign$geometry[2], add = TRUE, pch = 1, cex = 2, lwd = 3, col = "blue") -abline(h = seq(0, 9e5, length.out = 10), col = "grey", lty = 3) -abline(v = seq(0, 9e5, length.out = 10), col = "grey", lty = 3) +abline(h = seq(0, 9e5, length.out = 10), col = "gray", lty = 3) +abline(v = seq(0, 9e5, length.out = 10), col = "gray", lty = 3) dev.off() diff --git a/code/05-venn-clip.R b/code/05-venn-clip.R index 950dfdd0a..f70327ec0 100644 --- a/code/05-venn-clip.R +++ b/code/05-venn-clip.R @@ -9,33 +9,33 @@ if (!exists("b")) { } old_par = par(mfrow = c(2, 3), mai = c(0.1, 0.1, 0.1, 0.1)) -plot(b, border = "grey") -plot(x, add = TRUE, col = "lightgrey", border = "grey") +plot(b, border = "gray") +plot(x, add = TRUE, col = "lightgray", border = "gray") text(cex = 1.8, x = 0.5, y = 1, "x") -plot(b, add = TRUE, border = "grey") +plot(b, add = TRUE, border = "gray") x_not_y = st_difference(x, y) -plot(b, border = "grey") -plot(x_not_y, col = "lightgrey", add = TRUE, border = "grey") +plot(b, border = "gray") +plot(x_not_y, col = "lightgray", add = TRUE, border = "gray") text(cex = 1.8, x = 0.5, y = 1, "st_difference(x, y)") y_not_x = st_difference(y, x) -plot(b, border = "grey") -plot(y_not_x, col = "lightgrey", add = TRUE, border = "grey") +plot(b, border = "gray") +plot(y_not_x, col = "lightgray", add = TRUE, border = "gray") text(cex = 1.8, x = 0.5, y = 1, "st_difference(y, x)") x_or_y = st_union(x, y) -plot(x_or_y, col = "lightgrey", border = "grey") +plot(x_or_y, col = "lightgray", border = "gray") text(cex = 1.8, x = 0.5, y = 1, "st_union(x, y)") x_and_y = st_intersection(x, y) -plot(b, border = "grey") -plot(x_and_y, col = "lightgrey", add = TRUE, border = "grey") +plot(b, border = "gray") +plot(x_and_y, col = "lightgray", add = TRUE, border = "gray") text(cex = 1.8, x = 0.5, y = 1, "st_intersection(x, y)") # x_xor_y = st_difference(x_xor_y, x_and_y) # failing x_xor_y = st_sym_difference(x, y) -plot(x_xor_y, col = "lightgrey", border = "grey") +plot(x_xor_y, col = "lightgray", border = "gray") text(cex = 1.8, x = 0.5, y = 1, "st_sym_difference(x, y)") # plot.new() -# plot(b, border = "grey") -# plot(y, col = "lightgrey", add = TRUE, border = "grey") -# plot(b, add = TRUE, border = "grey") +# plot(b, border = "gray") +# plot(y, col = "lightgray", add = TRUE, border = "gray") +# plot(b, add = TRUE, border = "gray") # text(cex = 1.2, x = 0.5, y = 1, "y") par(old_par) diff --git a/code/13-cycleways.R b/code/13-cycleways.R index 4596be014..b62ecd7f3 100644 --- a/code/13-cycleways.R +++ b/code/13-cycleways.R @@ -6,7 +6,7 @@ bristol_stations_top = bristol_stations[desire_rail, , op = st_is_within_distanc m_leaflet = tm_shape(bristol_ttwa) + tm_borders(col = "darkblue") + tm_shape(bristol_ways) + - tm_lines(col = "highway", lwd = 3, palette = c("lightgreen", "grey", "pink")) + + tm_lines(col = "highway", lwd = 3, palette = c("lightgreen", "gray", "pink")) + tm_scalebar() + tm_shape(route_cycleway) + tm_lines(col = "blue", lwd = "all", scale = 20, alpha = 0.6) + diff --git a/code/chapters/02-spatial-data.R b/code/chapters/02-spatial-data.R index 12638fc8c..44f6651e4 100644 --- a/code/chapters/02-spatial-data.R +++ b/code/chapters/02-spatial-data.R @@ -419,14 +419,14 @@ library(tmap) tm1 = tm_shape(india_buffer_with_s2) + tm_fill(col = hcl.colors(4, palette = "purple green")[3]) + tm_shape(india) + - tm_fill(col = "grey95") + + tm_fill(col = "gray95") + tm_layout(main.title = "st_buffer() with dist = 1", title = "s2 switched on (default)") tm2 = tm_shape(india_buffer_without_s2) + tm_fill(col = hcl.colors(4, palette = "purple green")[3]) + tm_shape(india) + - tm_fill(col = "grey95") + + tm_fill(col = "gray95") + tm_layout(main.title = " ", title = "s2 switched off") diff --git a/code/chapters/04-spatial-operations.R b/code/chapters/04-spatial-operations.R index f8b18f382..43b684899 100644 --- a/code/chapters/04-spatial-operations.R +++ b/code/chapters/04-spatial-operations.R @@ -368,7 +368,7 @@ plot(grid, col = grid_sf$rooks) tm_shape(grid_sf) + tm_fill(col = c("queens", "rooks"), palette = c("white", "black")) + tm_shape(grid_sf) + - tm_borders(col = "grey", lwd = 2) + + tm_borders(col = "gray", lwd = 2) + tm_layout(frame = FALSE, legend.show = FALSE, panel.labels = c("queen", "rook")) diff --git a/code/chapters/05-geometry-operations.R b/code/chapters/05-geometry-operations.R index d67cb12d2..f9e0bac05 100644 --- a/code/chapters/05-geometry-operations.R +++ b/code/chapters/05-geometry-operations.R @@ -171,7 +171,7 @@ nz_scale_sf = st_set_geometry(nz, nz_scale) ## ----points, fig.cap="Overlapping circles.", fig.asp=0.4, crop = TRUE------------------------------- b = st_sfc(st_point(c(0, 1)), st_point(c(1, 1))) # create 2 points b = st_buffer(b, dist = 1) # convert points to circles -plot(b, border = "grey") +plot(b, border = "gray") text(x = c(-0.5, 1.5), y = 1, labels = c("x", "y"), cex = 3) # add text @@ -179,8 +179,8 @@ text(x = c(-0.5, 1.5), y = 1, labels = c("x", "y"), cex = 3) # add text x = b[1] y = b[2] x_and_y = st_intersection(x, y) -plot(b, border = "grey") -plot(x_and_y, col = "lightgrey", border = "grey", add = TRUE) # intersecting area +plot(b, border = "gray") +plot(x_and_y, col = "lightgray", border = "gray", add = TRUE) # intersecting area ## ----venn-clip, echo=FALSE, fig.cap="Spatial equivalents of logical operators.", warning=FALSE------ @@ -194,9 +194,9 @@ box = st_as_sfc(bb) set.seed(2017) p = st_sample(x = box, size = 10) p_xy1 = p[x_and_y] -plot(box, border = "grey", lty = 2) -plot(x, add = TRUE, border = "grey") -plot(y, add = TRUE, border = "grey") +plot(box, border = "gray", lty = 2) +plot(x, add = TRUE, border = "gray") +plot(y, add = TRUE, border = "gray") plot(p, add = TRUE) plot(p_xy1, cex = 3, col = "red", add = TRUE) text(x = c(-0.5, 1.5), y = 1, labels = c("x", "y"), cex = 2) diff --git a/code/chapters/10-gis.R b/code/chapters/10-gis.R index 02bb9d55f..73dce22db 100644 --- a/code/chapters/10-gis.R +++ b/code/chapters/10-gis.R @@ -63,12 +63,12 @@ aggzone_wgs = st_transform(aggregating_zones, "EPSG:4326") ## ----uniondata, echo=FALSE, fig.cap="Illustration of two areal units: incongruent (black lines) and aggregating zones (red borders). "---- library(tmap) tm_shape(incongr_wgs) + - tm_polygons(border.col = "grey5") + + tm_polygons(border.col = "gray5") + tm_shape(aggzone_wgs) + tm_borders(alpha = 0.5, col = "red") + tm_add_legend(type = "line", labels = c("incongr_wgs", "aggzone_wgs"), - col = c("grey5", "red"), + col = c("gray5", "red"), lwd = 3) + tm_scale_bar(position = c("left", "bottom"), breaks = c(0, 0.5, 1)) + diff --git a/code/chapters/_02-ex.R b/code/chapters/_02-ex.R index f3f02ae04..0ce27242f 100644 --- a/code/chapters/_02-ex.R +++ b/code/chapters/_02-ex.R @@ -49,9 +49,9 @@ text(world_coords, world$iso_a2) # Alternative answer: nigeria = world[world$name_long == "Nigeria", ] africa = world[world$continent == "Africa", ] -plot(st_geometry(nigeria), col = "white", lwd = 3, main = "Nigeria in context", border = "lightgrey", expandBB = c(0.5, 0.2, 0.5, 0.2)) -plot(st_geometry(world), lty = 3, add = TRUE, border = "grey") -plot(st_geometry(nigeria), col = "yellow", add = TRUE, border = "darkgrey") +plot(st_geometry(nigeria), col = "white", lwd = 3, main = "Nigeria in context", border = "lightgray", expandBB = c(0.5, 0.2, 0.5, 0.2)) +plot(st_geometry(world), lty = 3, add = TRUE, border = "gray") +plot(st_geometry(nigeria), col = "yellow", add = TRUE, border = "darkgray") a = africa[grepl("Niger", africa$name_long), ] ncentre = st_centroid(a) ncentre_num = st_coordinates(ncentre) diff --git a/code/chapters/_04-ex.R b/code/chapters/_04-ex.R index 84fb9dc0f..fc7ec28d0 100644 --- a/code/chapters/_04-ex.R +++ b/code/chapters/_04-ex.R @@ -57,13 +57,13 @@ nz_height_combined |> ## ----04-ex-4-1-------------------------------------------------------------------------------------- colorado = us_states[us_states$NAME == "Colorado", ] plot(us_states$geometry) -plot(colorado$geometry, col = "grey", add = TRUE) +plot(colorado$geometry, col = "gray", add = TRUE) ## ----04-ex-4-2-------------------------------------------------------------------------------------- intersects_with_colorado = us_states[colorado, , op = st_intersects] plot(us_states$geometry, main = "States that intersect with Colorado") -plot(intersects_with_colorado$geometry, col = "grey", add = TRUE) +plot(intersects_with_colorado$geometry, col = "gray", add = TRUE) ## ----04-ex-4-3-------------------------------------------------------------------------------------- @@ -86,7 +86,7 @@ us_states |> ## ----04-ex-4-4-------------------------------------------------------------------------------------- touches_colorado = us_states[colorado, , op = st_touches] plot(us_states$geometry, main = "States that touch Colorado") -plot(touches_colorado$geometry, col = "grey", add = TRUE) +plot(touches_colorado$geometry, col = "gray", add = TRUE) ## ----04-ex-4-5-------------------------------------------------------------------------------------- @@ -98,7 +98,7 @@ washington_to_cali = us_states |> states_crossed = us_states[washington_to_cali, , op = st_crosses] states_crossed$NAME plot(us_states$geometry, main = "States crossed by a straight line\n from the District of Columbia to central California") -plot(states_crossed$geometry, col = "grey", add = TRUE) +plot(states_crossed$geometry, col = "gray", add = TRUE) plot(washington_to_cali, add = TRUE) diff --git a/code/chapters/_404.R b/code/chapters/_404.R index ded22afab..2cffbb7bd 100644 --- a/code/chapters/_404.R +++ b/code/chapters/_404.R @@ -5,9 +5,9 @@ null_island = st_point(c(0, 0)) null_island = st_sfc(null_island, crs = 4326) null_island = st_sf(name = "Null Island", geom = null_island) tm_shape(null_island) + - tm_graticules(labels.col = "grey40") + + tm_graticules(labels.col = "gray40") + tm_text("name", size = 5, fontface = "italic") + tm_layout(bg.color = "lightblue", main.title = "You are here:", - main.title.color = "grey40") + main.title.color = "gray40") diff --git a/code/front_cover2.R b/code/front_cover2.R index f8895a80f..8ac08c20c 100644 --- a/code/front_cover2.R +++ b/code/front_cover2.R @@ -294,7 +294,7 @@ col_lv4 = alpha(pal[12], alp) col_lv5 = alpha(pal[15], alp) col_world = "#073B4C" -col_borders = "grey80" +col_borders = "gray80" col_back = "#1D201F" pal_dis = moma.colors("ustwo" , n = 5, type = "discrete") diff --git a/code/hex_sticker.R b/code/hex_sticker.R index 0c1882cd0..bb273bbf3 100644 --- a/code/hex_sticker.R +++ b/code/hex_sticker.R @@ -68,7 +68,7 @@ sticker( # y = st_geometry(y) %>% # st_transform(st_crs("+proj=laea +y_0=0 +lon_0=0 +lat_0=0 +ellps=WGS84 +no_defs")) # y = y + c(lon_0, 0) -# plot(st_geometry(x), col = "grey") +# plot(st_geometry(x), col = "gray") # plot(st_geometry(y), add = TRUE, col = "blue") # } # diff --git a/code/old-to-future-remove/10-earthquakes.R b/code/old-to-future-remove/10-earthquakes.R index fb0036b64..cf3a5e935 100644 --- a/code/old-to-future-remove/10-earthquakes.R +++ b/code/old-to-future-remove/10-earthquakes.R @@ -17,7 +17,7 @@ print(paste0(nrow(earthquakes), " significant earthquakes happened last month")) # map --------------------------------------------------------------------- -plot(st_geometry(world), border = "grey") +plot(st_geometry(world), border = "gray") plot(st_geometry(earthquakes), cex = earthquakes$mag, add = TRUE) title(paste0( "Location of significant (mag > 5) Earthquakes in the month to ", diff --git a/code/sf-classes.R b/code/sf-classes.R index 7a153eed1..f846cc98b 100644 --- a/code/sf-classes.R +++ b/code/sf-classes.R @@ -68,7 +68,7 @@ p_linestring_sf = ggplot() + axis.title = element_blank()) p_polygon_sf = ggplot() + - geom_sf(data = polygon_sf, fill = "grey") + + geom_sf(data = polygon_sf, fill = "gray") + labs(title = "POLYGON") + coord_sf(xlim = c(0.6, 1.6), ylim = c(0.4, 1.4)) + theme(line = element_blank(), @@ -92,7 +92,7 @@ p_multilinestring_sf = ggplot() + axis.title = element_blank()) p_multipolygon_sf = ggplot() + - geom_sf(data = multipolygon_sf, fill = "grey") + + geom_sf(data = multipolygon_sf, fill = "gray") + labs(title = "MULTIPOLYGON") + coord_sf(xlim = c(0.6, 1.6), ylim = c(0.4, 1.4)) + theme(line = element_blank(), @@ -102,10 +102,10 @@ p_multipolygon_sf = ggplot() + p_geometrycollection_sf = ggplot() + geom_sf(data = point_sf) + geom_sf(data = linestring_sf) + - geom_sf(data = polygon_sf, fill = "grey") + + geom_sf(data = polygon_sf, fill = "gray") + geom_sf(data = multipoint_sf) + geom_sf(data = multilinestring_sf) + - geom_sf(data = multipolygon_sf, fill = "grey") + + geom_sf(data = multipolygon_sf, fill = "gray") + labs(title = "GEOMETRYCOLLECTION") + coord_sf(xlim = c(0.6, 1.6), ylim = c(0.4, 1.4)) + theme( @@ -125,7 +125,7 @@ mygb = function(x, y) { grid.bezier( x = x, y = y, - gp = gpar(col = "grey", fill = "grey"), + gp = gpar(col = "gray", fill = "gray"), arrow = arrow(type = "closed", length = unit(2, "mm")) ) } diff --git a/geocompr.bib b/geocompr.bib index 5a4046d01..9ff8dd28c 100644 --- a/geocompr.bib +++ b/geocompr.bib @@ -152,7 +152,6 @@ @book{bivand_applied_2013 title = {Applied Spatial Data Analysis with {{R}}}, author = {Bivand, Roger and Pebesma, Edzer and {G{\'o}mez-Rubio}, Virgilio}, year = {2013}, - volume = {747248717}, publisher = {Springer}, googlebooks = {v0eIU9ObJXgC}, keywords = {Mathematics / Probability & Statistics / General,Medical / Biostatistics,Medical / General,Science / Earth Sciences / Geography,Science / Environmental Science,Technology & Engineering / Environmental / General} @@ -361,9 +360,10 @@ @article{breiman_random_2001 } @book{brenning_arcgis_2012, - title = {{{ArcGIS Geoprocessing}} in {{R}} via {{Python}}}, + title = {{{RPyGeo}}: {{ArcGIS Geoprocessing}} in {{R}} via {{Python}}}, author = {Brenning, Alexander}, year = {2012}, + publisher = {R package}, keywords = {nosource} } @@ -542,7 +542,7 @@ @article{conrad_system_2015 } @book{cooley_sfheaders_2020, - title = {Sfheaders: {{Converts}} between \{\vphantom\}{{R}}\vphantom\{\} Objects and Simple Feature Objects}, + title = {Sfheaders: {{Converts}} between {{R}} Objects and Simple Feature Objects}, author = {Cooley, David}, year = {2020}, publisher = {R package} @@ -632,7 +632,7 @@ @article{douglas_algorithms_1973 } @book{dunnington_ggspatial_2021, - title = {Ggspatial: {{Spatial}} Data Framework for \{ggplot2\}}, + title = {{{ggspatial}}: Spatial Data Framework for {{ggplot2}}}, author = {Dunnington, Dewey}, year = {2021}, publisher = {R package} @@ -721,7 +721,7 @@ @book{gillespie_efficient_2016 } @book{giraud_mapsf_2021, - title = {\{mapsf\}: {{Thematic}} Cartography}, + title = {Mapsf: {{Thematic}} Cartography}, author = {Giraud, Timoth{\'e}e}, year = {2021}, publisher = {R package} @@ -897,7 +897,7 @@ @article{hickman_transitions_2011 } @book{hijmans_geosphere_2016, - title = {\{geosphere\}: {{Spherical Trigonometry}}}, + title = {Geosphere: {{Spherical Trigonometry}}}, author = {Hijmans, Robert J.}, year = {2016}, publisher = {R package}, @@ -1134,7 +1134,7 @@ @book{kuhn_applied_2013 } @book{lahn_openeo_2021, - title = {\{openeo\}: {{Client}} Interface for '{{openEO}}' Servers}, + title = {Openeo: {{Client}} Interface for '{{openEO}}' Servers}, author = {Lahn, Florian}, year = {2021}, publisher = {R package} @@ -1342,19 +1342,21 @@ @article{lovelace_jittering_2022b } @article{lovelace_open_2021, - ids = {lovelace_open_2021a}, title = {Open Source Tools for Geographic Analysis in Transport Planning}, author = {Lovelace, Robin}, year = {2021}, - month = jan, + month = oct, journal = {Journal of Geographical Systems}, volume = {23}, - publisher = {{Springer Science and Business Media LLC}}, + number = {4}, + pages = {547--578}, issn = {1435-5949}, - doi = {10/ghtnrp}, - urldate = {2021-01-17}, + doi = {10.1007/s10109-020-00342-2}, + urldate = {2024-10-06}, abstract = {Geographic analysis has long supported transport plans that are appropriate to local contexts. Many incumbent `tools of the trade' are proprietary and were developed to support growth in motor traffic, limiting their utility for transport planners who have been tasked with twenty-first century objectives such as enabling citizen participation, reducing pollution, and increasing levels of physical activity by getting more people walking and cycling. Geographic techniques---such as route analysis, network editing, localised impact assessment and interactive map visualisation---have great potential to support modern transport planning priorities. The aim of this paper is to explore emerging open source tools for geographic analysis in transport planning, with reference to the literature and a review of open source tools that are already being used. A key finding is that a growing number of options exist, challenging the current landscape of proprietary tools. These can be classified as command-line interface, graphical user interface or web-based user interface tools and by the framework in which they were implemented, with numerous tools released as R, Python and JavaScript packages, and QGIS plugins. The review found a diverse and rapidly evolving `ecosystem' tools, with 25 tools that were designed for geographic analysis to support transport planning outlined in terms of their popularity and functionality based on online documentation. They ranged in size from single-purpose tools such as the QGIS plugin AwaP to sophisticated stand-alone multi-modal traffic simulation software such as MATSim, SUMO and Veins. Building on their ability to re-use the most effective components from other open source projects, developers of open source transport planning tools can avoid `reinventing the wheel' and focus on innovation, the `gamified' A/B Street https://github.com/dabreegster/abstreet/\#abstreetsimulation software, based on OpenStreetMap, a case in point. The paper, the source code of which can be found at https://github.com/robinlovelace/open-gat, concludes that, although many of the tools reviewed are still evolving and further research is needed to understand their relative strengths and barriers to uptake, open source tools for geographic analysis in transport planning already hold great potential to help generate the strategic visions of change and evidence that is needed by transport planners in the twenty-first century.}, - langid = {english} + copyright = {CC0 1.0 Universal Public Domain Dedication}, + langid = {english}, + keywords = {Artificial Intelligence,C6,Geographic data,Geographic data analysis,Open source,R41,Software,Transport modelling,Transport planning} } @article{lovelace_propensity_2017, @@ -1387,6 +1389,7 @@ @book{majure_sgeostat_2016 title = {Sgeostat: {{An Object-Oriented Framework}} for {{Geostatistical Modeling}} in {{S}}+}, author = {Majure, James J. and Gebhardt, Albrecht}, year = {2016}, + publisher = {R package}, keywords = {nosource} }