mapsPERU is a package that provides datasets with information of the centroids and geographical limits of the regions, departments, provinces and districts of Peru.
Install mapsPERU from CRAN:
install.packages("mapsPERU")
or install mapsPERU with
library(remotes)
install_github("musajajorge/mapsPERU")
The datasets included in this package are:
- map_REG: Geographic information of the regions of Peru
- map_DEP: Geographic information of the departments of Peru
- map_PROV: Geographic information of the provinces of Peru
- map_DIST: Geographic information of the districts of Peru
- map_REG
Column | Type | Description |
---|---|---|
COD_REGION | chr | Region code |
REGION | chr | Region name |
coords_x | dbl | Longitude of the centroid of the region |
coords_y | dbl | Latitude of the centroid of the region |
geometry | MULTIPOLYGON | MULTIPOLYGON Geometric object |
Note: Officially there is no codification for regions, only for departments. Therefore, the codes 150100 for Metropolitan Lima and 159900 for Lima Provinces should be taken as a reference.
- map_DEP
Column | Type | Description |
---|---|---|
COD_DEPARTAMENTO | chr | Department code |
DEPARTAMENTO | chr | Department name |
coords_x | dbl | Longitude of the centroid of the department |
coords_y | dbl | Latitude of the centroid of the department |
geometry | MULTIPOLYGON | MULTIPOLYGON Geometric object |
- map_PROV
Column | Type | Description |
---|---|---|
COD_REGION | chr | Region code |
COD_DEPARTAMENTO | chr | Department code |
COD_PROVINCIA | chr | Province code |
REGION | chr | Region name |
DEPARTAMENTO | chr | Department name |
PROVINCIA | chr | Province name |
coords_x | dbl | Longitude of the centroid of the province |
coords_y | dbl | Latitude of the centroid of the province |
geometry | MULTIPOLYGON | MULTIPOLYGON Geometric object |
- map_DIST
Column | Type | Description |
---|---|---|
COD_REGION | chr | Region code |
COD_DEPARTAMENTO | chr | Department code |
COD_PROVINCIA | chr | Province code |
COD_DISTRITO | chr | District code |
REGION | chr | Region name |
DEPARTAMENTO | chr | Department name |
PROVINCIA | chr | Province name |
DISTRITO | chr | District name |
NOMBRE_CAPITAL_LEGAL | chr | Capital name |
REGION_NATURAL | chr | Natural region |
coords_x | dbl | Longitude of the centroid of the district |
coords_y | dbl | Latitude of the centroid of the district |
geometry | MULTIPOLYGON | MULTIPOLYGON Geometric object |
You do not need to install additional packages to use mapsPERU datasets; however, if you want to see the structure of each dataset with str() or dplyr::glimpse() it is advisable to run library(sf) beforehand.
library(mapsPERU)
df <- map_DEP
library(ggplot2)
library(sf)
ggplot(df, aes(geometry=geometry)) +
geom_sf(aes(fill=DEPARTAMENTO))
In this example we are using the name of the departments as a categorical variable in the graph. You can combine the mapsPERU data sets with other categorical or numeric variables that you want to plot.
Note that mapsPERU also provides geographic information of the centroids, so you can include the names of the departments as labels.
library(mapsPERU)
df <- map_DEP
library(ggplot2)
library(sf)
ggplot(df, aes(geometry=geometry)) +
geom_sf(aes(fill=DEPARTAMENTO)) +
geom_text(data=df, aes(coords_x, coords_y, group=NULL, label=DEPARTAMENTO), size=2.5) +
labs(x="", y="")
The centroids dataset not only provides the longitudes and latitudes of each region but also includes the geometry field, which is a multipolygon that will allow us to plot numerical variables on our map.
In this example, we are going to plot the average cost per student (ACPS) in university higher education in the year 2020.
library(mapsPERU)
df <- map_REG
REGION <- c('Amazonas','Áncash','Apurímac','Arequipa','Ayacucho','Cajamarca','Callao',
'Cusco','Huancavelica','Huánuco','Ica','Junín','La Libertad','Lambayeque',
'Lima Metropolitana','Lima Provincias','Loreto','Madre de Dios','Moquegua',
'Pasco','Piura','Puno','San Martín','Tacna','Tumbes','Ucayali')
ACPS <- c(12364,7001,8615,10302,5015,8632,7507,7909,6843,8412,6950,7182,8363,5941,10595,
6742,8250,6888,31287,7630,12647,7282,10512,8017,11454,6998)
ACPS <- data.frame(cbind(REGION,ACPS))
ACPS$ACPS <- as.numeric(ACPS$ACPS)
library(dplyr)
library(sf)
df <- left_join(df, ACPS, by="REGION")
library(ggplot2)
ggplot(df, aes(geometry=geometry)) +
geom_sf(aes(fill=ACPS)) +
scale_fill_gradient (low="mediumblue", high="red3")
In this example we will show how the use of regional boundaries and centroids datasets facilitates the filtering of specific regions to be displayed on the map.
library(mapsPERU)
df <- map_REG
df <- dplyr::filter(df, REGION=="Lima Metropolitana" |
REGION=="Lima Provincias" | REGION=="Callao")
library(ggplot2)
library(sf)
ggplot(df, aes(geometry=geometry)) +
geom_sf(aes(fill=REGION)) +
geom_text(data=df, aes(coords_x, coords_y, group=NULL, label=REGION), size=3) +
labs(x="", y="")
library(mapsPERU)
df <- map_PROV
library(readxl)
url <- "https://zenodo.org/record/5646444/files/POBLACION_INEI_2021.xlsx?download=1"
destfile <- "POBLACION_INEI_2021.xlsx"
curl::curl_download(url, destfile)
pob <- read_excel(destfile)
library(dplyr)
pob_prov <- pob %>%
group_by(COD_PROVINCIA) %>%
summarise(Población = sum(Cantidad))
library(dplyr)
library(sf)
df <- left_join(df, pob_prov, by="COD_PROVINCIA")
df$Categoría <- cut(df$Población, right=F, breaks=c(0,100000,500000,1000000,Inf),
labels=c("Menos a 100 mil personas",
"Menos de 500 mil personas",
"Más de 500 mil personas",
"Más de 1 millón de personas"))
colores <- c('#feebe2','#fbb4b9','#f768a1','#ae017e')
library(ggplot2)
ggplot(df, aes(geometry=geometry)) +
scale_fill_manual(values=colores)+
geom_sf(aes(fill=Categoría))
library(mapsPERU)
df <- map_DIST
library(readxl)
url <- "https://zenodo.org/record/5646444/files/POBLACION_INEI_2021.xlsx?download=1"
destfile <- "POBLACION_INEI_2021.xlsx"
curl::curl_download(url, destfile)
pob <- read_excel(destfile)
library(dplyr)
pob_dist <- pob %>%
group_by(COD_DISTRITO) %>%
summarise(Cantidad = sum(Cantidad))
library(dplyr)
library(sf)
df <- left_join(df, pob_dist, by="COD_DISTRITO")
df$Pob_Group <- ifelse(df$Cantidad<1000, "Menos de 1,000",
ifelse(df$Cantidad<5000, "Menos de 5,000",
ifelse(df$Cantidad<10000, "Menos de 10,000",
ifelse(df$Cantidad<20000, "Menos de 20,000",
"Más de 20,000"))))
df$Pob_Group <- ifelse(is.na(df$Pob_Group)==T, "No disponible", df$Pob_Group)
df$Pob_Group <- factor(df$Pob_Group, levels=c("Menos de 1,000","Menos de 5,000",
"Menos de 10,000","Menos de 20,000",
"Más de 20,000","No disponible"))
colores <- c('#edf8fb','#b3cde3','#8c96c6','#8856a7','#810f7c','#1C2833')
library(ggplot2)
ggplot(df, aes(geometry=geometry)) +
scale_fill_manual(values=colores)+
geom_sf(aes(fill=Pob_Group))
library(mapsPERU)
df <- map_DIST
df$REGION_NATURAL <- ordered(df$REGION_NATURAL, levels=c("Costa","Sierra","Selva"))
colores <- c('#F1C40F','#D35400','#229954')
library(sf)
library(ggplot2)
ggplot(df, aes(geometry=geometry)) +
scale_fill_manual(values=colores) +
geom_sf(aes(fill=REGION_NATURAL)) +
labs(fill='Región natural')