Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Progress #1

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
^docs$
^pkgdown$
^\.github$
^private$
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ vignettes/*.pdf

# pkgdown
docs

# private folder for draft test scripts
private
35 changes: 25 additions & 10 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,33 @@ Package: cartogram
Title: Create Cartograms with R
Version: 0.3.0
Authors@R: c(
person("Sebastian", "Jeworutzki", email = "sebastian.jeworutzki@ruhr-uni-bochum.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2671-5253")),
person("Timothee", "Giraud", role = "ctb"),
person("Nicolas", "Lambert", role = "ctb"),
person("Roger", "Bivand", role = c("cph"), email = "Roger.Bivand@nhh.no"),
person("Edzer", "Pebesma", role = "cph"),
person("Jakub", "Nowosad", email = "nowosad.jakub@gmail.com", role = "ctb", comment = c(ORCID = "0000-0002-1057-3721"))
person("Sebastian", "Jeworutzki", , "sebastian.jeworutzki@ruhr-uni-bochum.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2671-5253")),
person("Timothee", "Giraud", role = "ctb"),
person("Nicolas", "Lambert", role = "ctb"),
person("Roger", "Bivand", , "Roger.Bivand@nhh.no", role = "cph"),
person("Edzer", "Pebesma", role = "cph"),
person("Jakub", "Nowosad", , "nowosad.jakub@gmail.com", role = "ctb",
comment = c(ORCID = "0000-0002-1057-3721")),
person("Egor", "Kotov", , "kotov.egor@gmail.com", role = "ctb",
comment = c(ORCID = "0000-0001-6690-5345"))
)
Description: Construct continuous and non-contiguous area cartograms.
URL: https://github.com/sjewo/cartogram, https://sjewo.github.io/cartogram/
License: GPL-3
URL: https://github.com/sjewo/cartogram,
https://sjewo.github.io/cartogram/
BugReports: https://github.com/sjewo/cartogram/issues
Imports: methods, sf, packcircles
Imports:
methods,
packcircles,
rlang,
sf
Suggests:
License: GPL-3
future,
future.apply,
parallelly,
progressr,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
211 changes: 170 additions & 41 deletions R/cartogram_cont.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@
#' "none", don't adjust weighting values
#' @param threshold Define threshold for data preparation
#' @param verbose print meanSizeError on each iteration
#' @param n_cpu Number of cores to use. Defaults to "respect_future_plan". Available options are:
#' * "respect_future_plan" - By default, the function will run on a single core, unless the user specifies the number of cores using \code{\link[future]{plan}} (e.g. `future::plan(future::multisession, workers = 4)`) before running the `cartogram_cont` function.
#' * "auto" - Use all except available cores (identified with \code{\link[parallelly]{availableCores}}) except 1, to keep the system responsive.
#' * a `numeric` value - Use the specified number of cores. In this case `cartogram_cont` will use set the specified number of cores internally with `future::plan(future::multisession, workers = n_cpu)` and revert that back by switching the plan back to whichever plan might have been set before by the user. If only 1 core is set, the function will not require `future` and `future.apply` and will run on a single core.
#' @param show_progress A `logical` value. If TRUE, show progress bar. Defaults to TRUE.
#' @return An object of the same class as x
#' @export
#' @importFrom methods is slot
#' @importFrom stats quantile
#' @importFrom sf st_area st_as_sf st_centroid st_coordinates st_distance st_geometry st_geometry<- st_point st_crs st_crs<-
#' @examples
#'# ========= Basic example =========
#'library(sf)
#'library(cartogram)
#'
Expand All @@ -51,9 +57,62 @@
#'plot(nc[,"BIR74"], main="original", key.pos = NULL, reset = FALSE)
#'plot(nc_utm_carto[,"BIR74"], main="distorted", key.pos = NULL, reset = FALSE)
#'
#'
#'# ========= Advanced example 1 =========
#'# Faster cartogram using multiple CPU cores
#'# using n_cpu parameter
#'library(sf)
#'library(cartogram)
#'
# Load North Carolina SIDS data
#'nc = st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE)
#'
#'# transform to NAD83 / UTM zone 16N
#'nc_utm <- st_transform(nc, 26916)
#'
#'# Create cartogram using 2 CPU cores on local machine
#'nc_utm_carto <- cartogram_cont(nc_utm, weight = "BIR74", itermax = 5,
#' n_cpu = 2)
#'
#'# Plot
#'par(mfrow=c(2,1))
#'plot(nc[,"BIR74"], main="original", key.pos = NULL, reset = FALSE)
#'plot(nc_utm_carto[,"BIR74"], main="distorted", key.pos = NULL, reset = FALSE)
#'
#'
#'# ========= Advanced example 2 =========
#'# Faster cartogram using multiple CPU cores
#'# using future package plan
#'library(sf)
#'library(cartogram)
#'library(future)
#'
# Load North Carolina SIDS data
#'nc = st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE)
#'
#'# transform to NAD83 / UTM zone 16N
#'nc_utm <- st_transform(nc, 26916)
#'
#'# Set the future plan with 2 CPU local cores
#'# You can of course use any other plans, not just multisession
#'future::plan(future::multisession, workers = 2)
#'
#'# Create cartogram with multiple CPU cores
#'# The cartogram_cont() will respect the plan set above
#'nc_utm_carto <- cartogram_cont(nc_utm, weight = "BIR74", itermax = 5)
#'
#'# Shutdown the R processes that were created by the future plan
#'future::plan(future::sequential)
#'
#'# Plot
#'par(mfrow=c(2,1))
#'plot(nc[,"BIR74"], main="original", key.pos = NULL, reset = FALSE)
#'plot(nc_utm_carto[,"BIR74"], main="distorted", key.pos = NULL, reset = FALSE)
#'
#' @references Dougenik, J. A., Chrisman, N. R., & Niemeyer, D. R. (1985). An Algorithm To Construct Continuous Area Cartograms. In The Professional Geographer, 37(1), 75-81.
cartogram_cont <- function(x, weight, itermax=15, maxSizeError=1.0001,
prepare="adjust", threshold=0.05, verbose = FALSE) {
prepare="adjust", threshold=0.05, verbose = FALSE,
n_cpu="respect_future_plan", show_progress=TRUE) {
UseMethod("cartogram_cont")
}

Expand All @@ -73,21 +132,61 @@ cartogram <- function(shp, ...) {
#' @importFrom sf st_as_sf
#' @export
cartogram_cont.SpatialPolygonsDataFrame <- function(x, weight, itermax=15, maxSizeError=1.0001,
prepare="adjust", threshold=0.05, verbose = FALSE) {
prepare="adjust", threshold=0.05, verbose = FALSE,
n_cpu="respect_future_plan", show_progress=TRUE) {
as(cartogram_cont.sf(sf::st_as_sf(x), weight, itermax=itermax, maxSizeError=maxSizeError,
prepare=prepare, threshold=threshold, verbose=verbose), 'Spatial')
prepare=prepare, threshold=threshold, verbose=verbose, n_cpu=n_cpu, show_progress=show_progress), 'Spatial')

}

#' @rdname cartogram_cont
#' @importFrom sf st_area st_geometry st_geometry_type st_centroid st_crs st_coordinates st_buffer st_is_longlat
#' @export
cartogram_cont.sf <- function(x, weight, itermax = 15, maxSizeError = 1.0001,
prepare = "adjust", threshold = 0.05, verbose = FALSE) {
prepare = "adjust", threshold = 0.05, verbose = FALSE, n_cpu="respect_future_plan", show_progress=TRUE) {

if (isTRUE(sf::st_is_longlat(x))) {
stop('Using an unprojected map. This function does not give correct centroids and distances for longitude/latitude data:\nUse "st_transform()" to transform coordinates to another projection.', call. = F)
}

# Check n_cpu parameter and set up parallel processing
if(length(n_cpu) > 1) {
stop('Invalid value for `n_cpu`. Use "respect_future_plan", "auto", or a numeric value.', call. = FALSE)
}

# Determine if we should use multithreading
if (is.numeric(n_cpu) & n_cpu == 1) {
multithreadded <- FALSE
} else if (is.numeric(n_cpu) & n_cpu > 1) {
cartogram_assert_package(c("future", "future.apply"))
original_plan <- future::plan(future::multisession, workers = n_cpu)
on.exit(future::plan(original_plan), add = TRUE)
multithreadded <- TRUE
} else if (n_cpu == "auto") {
cartogram_assert_package("parallelly")
n_cpu <- max(parallelly::availableCores() - 1, 1)
if (n_cpu == 1) {
multithreadded <- FALSE
} else if (n_cpu > 1) {
cartogram_assert_package(c("future", "future.apply"))
original_plan <- future::plan(future::multisession, workers = n_cpu)
on.exit(future::plan(original_plan), add = TRUE)
multithreadded <- TRUE
}
} else if (n_cpu == "respect_future_plan") {
if (rlang::is_installed("future")) {
if (is(future::plan(), "sequential")) {
multithreadded <- FALSE
} else {
multithreadded <- TRUE
}
} else {
multithreadded <- FALSE
}
} else {
stop('Invalid value for `n_cpu`. Use "respect_future_plan", "auto", or a numeric value.', call. = FALSE)
}

# prepare data
value <- x[[weight]]

Expand Down Expand Up @@ -168,45 +267,75 @@ cartogram_cont.sf <- function(x, weight, itermax = 15, maxSizeError = 1.0001,
if(verbose)
message(paste0("Mean size error for iteration ", z , ": ", meanSizeError))

for (i in seq_len(nrow(x.iter))) {
pts <- sf::st_coordinates(x.iter_geom[[i]])
idx <- unique(pts[, colnames(pts) %in% c("L1", "L2", "L3")])

for (k in seq_len(nrow(idx))) {
newpts <- pts[pts[, "L1"] == idx[k, "L1"] & pts[, "L2"] == idx[k, "L2"], c("X", "Y")]

distances <- apply(centroids, 1, function(pt) {
ptm <- matrix(pt, nrow=nrow(newpts), ncol=2, byrow=T)
sqrt(rowSums((newpts - ptm)^2))
})

#dold <- spDists(newpts, centroids)
#all.equal(distances, dold)

#distance
for (j in seq_len(nrow(centroids))) {
distance <- distances[, j]

# calculate force vector
Fij <- mass[j] * radius[j] / distance
Fbij <- mass[j] * (distance / radius[j]) ^ 2 * (4 - 3 * (distance / radius[j]))
Fij[distance <= radius[j]] <- Fbij[distance <= radius[j]]
Fij <- Fij * forceReductionFactor / distance

# calculate new border coordinates
newpts <- newpts + cbind(X1 = Fij, X2 = Fij) * (newpts - centroids[rep(j, nrow(newpts)), ])
}

# save final coordinates from this iteration to coordinate list
if (sf::st_geometry_type(sf::st_geometry(x.iter)[[i]]) == "POLYGON"){
sf::st_geometry(x.iter)[[i]][[idx[k, "L1"]]] <- newpts
} else {
sf::st_geometry(x.iter)[[i]][[idx[k, "L2"]]][[idx[k, "L1"]]] <- newpts
}
# Process polygons either in parallel or sequentially
if (multithreadded) {
if (show_progress) {
cartogram_assert_package("progressr")
progressr::handlers(global = TRUE)
progressr::handlers("progress")
p <- progressr::progressor(along = seq_len(nrow(x.iter)))
} else {
p <- function(...) NULL
}

x.iter_geom <- future.apply::future_lapply(
seq_len(nrow(x.iter)),
function(i) {
if (show_progress) p(sprintf("Processing polygon %d in iteration %d", i, z))
process_polygon(x.iter_geom[[i]], centroids, mass, radius, forceReductionFactor)
},
future.seed = TRUE
)
} else {
if (show_progress) {
pb <- utils::txtProgressBar(min = 0, max = nrow(x.iter), style = 3)
}

x.iter_geom <- lapply(
seq_len(nrow(x.iter)),
function(i) {
if (show_progress) utils::setTxtProgressBar(pb, i)
process_polygon(x.iter_geom[[i]], centroids, mass, radius, forceReductionFactor)
}
)

if (show_progress) close(pb)
}

sf::st_geometry(x.iter) <- do.call(sf::st_sfc, x.iter_geom)
}

# return and try to fix self-intersections
return(sf::st_buffer(x.iter, 0))
}

#' @keywords internal
process_polygon <- function(poly_geom, centroids, mass, radius, forceReductionFactor) {
pts <- sf::st_coordinates(poly_geom)
idx <- unique(pts[, colnames(pts) %in% c("L1", "L2", "L3")])

for (k in seq_len(nrow(idx))) {
newpts <- pts[pts[, "L1"] == idx[k, "L1"] & pts[, "L2"] == idx[k, "L2"], c("X", "Y")]

distances <- apply(centroids, 1, function(pt) {
ptm <- matrix(pt, nrow=nrow(newpts), ncol=2, byrow=T)
sqrt(rowSums((newpts - ptm)^2))
})

for (j in seq_len(nrow(centroids))) {
distance <- distances[, j]

Fij <- mass[j] * radius[j] / distance
Fbij <- mass[j] * (distance / radius[j]) ^ 2 * (4 - 3 * (distance / radius[j]))
Fij[distance <= radius[j]] <- Fbij[distance <= radius[j]]
Fij <- Fij * forceReductionFactor / distance

newpts <- newpts + cbind(X1 = Fij, X2 = Fij) * (newpts - centroids[rep(j, nrow(newpts)), ])
}

if (sf::st_geometry_type(poly_geom) == "POLYGON") {
poly_geom[[idx[k, "L1"]]] <- newpts
} else {
poly_geom[[idx[k, "L2"]]][[idx[k, "L1"]]] <- newpts
}
}
return(poly_geom)
}
Loading
Loading