Skip to content

Commit

Permalink
Merge pull request #1 from IvanWilli/main
Browse files Browse the repository at this point in the history
updating fork...
  • Loading branch information
alburezg authored Jan 4, 2023
2 parents 1989489 + 76df243 commit 5ad8a47
Show file tree
Hide file tree
Showing 60 changed files with 2,410 additions and 997 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
.Ruserdata
inst/doc
diego.txt
.m
.mat
.csv
29 changes: 23 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
Package: DemoKin
Title: Demokin
Description: Estimate kin counts for stable and non stable populations
Version: 0.0.0.9000
Description: Estimate population kin counts and its distribution by type and age
Version: 1.0.0
Authors@R: c(
person("Iván", "Williams", email = "act.ivanwilliams@gmail.com", role = "cre"),
person("Diego", "Alburez-Gutierrez", email = "alburezgutierrez@demogr.mpg.de", role = "aut"))
person("Diego", "Alburez-Gutierrez", email = "alburezgutierrez@demogr.mpg.de", role = "aut"),
person("Xi", "Song", email = "xisong@sas.upenn.edu", role = "ctb"))
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.2.1
Suggests:
knitr,
rmarkdown
rmarkdown,
testthat (>= 3.0.0),
ggplot2
VignetteBuilder: knitr
Imports: tidyverse, DiagrammeR, HMDHFDplus, devtools
Imports:
dplyr,
tidyr,
purrr,
forcats,
HMDHFDplus,
progress,
matrixcalc,
Matrix,
MASS,
stats,
igraph,
magrittr,
lifecycle
BugReports: https://github.com/IvanWilli/DemoKin/issues
Depends:
R (>= 2.10)
Config/testthat/edition: 3
Binary file added DemoKin-Logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(demokin_codes)
export(get_HMDHFD)
export(kin)
export(kin_non_stable)
export(kin_stable)
export(kin_multi_stage)
export(kin_time_invariant)
export(kin_time_variant)
export(plot_diagram)
importFrom(DiagrammeR,mermaid)
export(rename_kin)
importFrom(magrittr,"%>%")
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# DemoKin 1.0.0

* Added a `NEWS.md` file to track changes to the package.
* Change stable/non-stable references to time varying/non-varying rates.
* Add multi-state process.

49 changes: 49 additions & 0 deletions R/aux_funs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

#' print kin codes
#' @description Print kin codes and labels
#' @export
demokin_codes <- function(){
codes <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")
caswell_codes <- c("t", "v", "a", "b", "c", "h", "g", "d", "p", "q", "r", "s", "m", "n")
labels <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister")
data.frame(DemoKin = codes, Caswell = caswell_codes, Label = labels, row.names = NULL)
}

#' rename kin
#' @description Rename kin labels depending consolidate some types
#' @export
rename_kin <- function(df, consolidate_column = "no"){

stopifnot("Argument 'consolidate_column' should be 'no' or a valid column name" = consolidate_column %in% c("no", colnames(df)))

if(consolidate_column == "no"){

relatives <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister")
names(relatives) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")

} else {

# Combine kin types irrespective of whether they come from older
# or younger sibling lines
consolidate_vec <- c("c", "c", "d", "gd", "ggd", "ggm", "gm", "m", "n", "n", "a", "a", "s", "s")
names(consolidate_vec) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")

# Rename kin types from codes to actual words
relatives <- c("Cousins", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces", "Aunt", "Sister")
names(relatives) <- unique(consolidate_vec)

df <- as.data.frame(df)
df$count <- df[ , consolidate_column]

df <-
df %>%
dplyr::mutate(kin = consolidate_vec[kin]) %>%
dplyr::group_by(age_focal, kin) %>%
dplyr::summarise(count = sum(count)) %>%
dplyr::ungroup()


}
df$kin <- relatives[df$kin]
df
}
66 changes: 65 additions & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,16 @@
#'
#' @source
#' HMD/HFD
"swe_surv"
"swe_Sx"

#' Female swedish survival probabilities from 1900 to 2015
#' @docType data
#' @format
#' A matrix with years as cols and ages (0 to 100 as OAG) as rows.
#'
#' @source
#' HMD/HFD
"swe_px"

#' Swedish age-specific fertility rates from 1900 to 2015
#'
Expand Down Expand Up @@ -63,3 +72,58 @@
#' @source
#' Caswell (2019)
"pi_caswell_2021"

#' Female Slovakian survival probabilities by parity stage in 1990 (Caswell, 2021)
#'
#' Female Slovakian survival probabilities by parity stage in 1990 (Caswell, 2021)
#' @docType data
#' @format
#' A matrix of px with stages as cols and ages as rows.
#'
#' @source
#' Caswell (2021)
"svk_pxs"

#' Female Slovakian fertility rates by parity stage in 1990 (Caswell, 2021)
#'
#' Female Slovakian fertility rates by parity stage in 1990 (Caswell, 2021)
#' @docType data
#' @format
#' A matrix of fx with stages as cols and ages as rows.
#'
#' @source
#' Caswell (2021)
"svk_fxs"

#' Age where assign offspring of individuals in each partity stage (Caswell, 2021). All to zero age in this case.
#'
#' Age where assign offspring of individuals in each partity stage (Caswell, 2021). All to zero age in this case.
#' @docType data
#' @format
#' A matrix of ones in ages where assign offspring individuals, with stages as cols and ages as rows.
#'
#' @source
#' Caswell (2021)
"svk_Hxs"

#' Probability of transition among parity stage for Slovakia in 1990, for each age, conditional on survival (Caswell, 2021).
#'
#' Probability of transition among parity stage for Slovakia in 1990, for each age, conditional on survival (Caswell, 2021).
#' @docType data
#' @format
#' A list of column-stochastic matrix with probabilities of transition among parity stage, for each age, conditional on survival.
#'
#' @source
#' Caswell (2021)
"svk_Uxs"

#' Output for Slovakia 1990 in Caswell (2020).
#'
#' Output for Slovakia 1990 in Caswell (2020).
#' @docType data
#' @format
#' A list with specific kin types age-stage matrix
#'
#' @source
#' Caswell (2021)
"kin_svk1990_caswell2020"
138 changes: 87 additions & 51 deletions R/get_HMDHFD.R
Original file line number Diff line number Diff line change
@@ -1,89 +1,125 @@
#' Get time serie matrix data from HMD/HFD

#' @description Wrapper function to get data.frames of survival, fertlity and population
#' of selected countriy on selected period.
#' @description Wrapper function to get data of female survival, fertlity and population
#' of selected country on selected period.

#' @param country numeric. Country code from rom HMD/HFD.
#' @param max_year numeric. Latest year to get data.
#' @param min_year integer. Older year to get data.
#' @param user character. From HMD/HFD.
#' @param pass character. From HMD/HFD.
#'
#' @return A list wiith survival, fertility and poopulation age specific matrixes, with calendar year as colnames.
#' @param user_HMD character. From HMD.
#' @param user_HFD character. From HFD.
#' @param pass_HMD character. From HMD.
#' @param pass_HFD character. From HFD.
#' @param OAG numeric. Open age group to standarize output.
#' @return A list wiith female survival probability, survival function, fertility and poopulation age specific matrixes, with calendar year as colnames.
#' @export

get_HMDHFD <- function(country = "SWE",
min_year = 1900,
max_year = 2018,
user = NULL,
pass = NULL){
user_HMD = NULL,
pass_HMD = NULL,
user_HFD = NULL,
pass_HFD = NULL,
OAG = 100){

# source HMD HFD - use SWE now-----------------------------------------------------------------
pop <- readHMDweb(CNTRY = country, "Population", user, pass, fixup = TRUE) %>%
select(Year, Age, N = Female1)%>%
filter(Year >= min_year, Year <= max_year)
lt <- readHMDweb(country, "fltper_1x1", user, pass, fixup = TRUE) %>%
filter(Year >= min_year, Year <= max_year)
asfr <- readHFDweb(country, "asfrRR", user, "52962", fixup = TRUE)%>%
filter(Year >= min_year, Year <= max_year)
if(any(c(is.null(user_HMD), is.null(user_HFD), is.null(pass_HMD), is.null(pass_HFD)))){
stop("The function needs HMD and HMF access.")
}

# source HMD HFD -----------------------------------------------------------------
pop <- HMDHFDplus::readHMDweb(CNTRY = country, "Population", user_HMD, pass_HMD, fixup = TRUE) %>%
dplyr::select(Year, Age, N = Female1)%>%
dplyr::filter(Year >= min_year, Year <= max_year)
lt <- HMDHFDplus::readHMDweb(country, "fltper_1x1", user_HMD, pass_HMD, fixup = TRUE) %>%
dplyr::filter(Year >= min_year, Year <= max_year)
asfr <- HMDHFDplus::readHFDweb(country, "asfrRR", user_HFD, pass_HFD, fixup = TRUE)%>%
dplyr::filter(Year >= min_year, Year <= max_year)

# list of yearly Leslie matrix ---------------------------------------------------

age = 0:100
age = 0:OAG
ages = length(age)
w = last(age)
last_year = max(lt$Year)
years = min_year:last_year

# survival probabilities
L <- lt %>%
filter(Age<=w) %>%
mutate(Lx = ifelse(Age==w, Tx, Lx)) %>%
select(Year, Age, Lx) %>%
spread(Year, Lx) %>%
select(-Age)
# survival probability
px <- lt %>%
dplyr::filter(Age<=OAG) %>%
dplyr::mutate(px = 1 - qx,
px = ifelse(Age==OAG, 0, px)) %>%
dplyr::select(Year, Age, px) %>%
tidyr::pivot_wider(names_from = "Year", values_from = "px") %>%
dplyr::select(-Age) %>%
as.matrix()
rownames(px) = 0:OAG

P <- rbind(L[c(-1,-101),]/L[-c(100:101),],
L[101,]/(L[100,]+L[101,]),
L[101,]/(L[100,]+L[101,]))
rownames(P) = 0:100
# survival function
Lx <- lt %>%
dplyr::filter(Age<=OAG) %>%
dplyr::mutate(Lx = ifelse(Age==OAG, Tx, Lx)) %>%
dplyr::select(Year, Age, Lx) %>%
tidyr::pivot_wider(names_from = "Year", values_from = "Lx") %>%
dplyr::select(-Age) %>%
as.matrix()

Sx <- rbind(Lx[c(-1,-ages),]/Lx[-c(w:ages),],
Lx[ages,]/(Lx[w,]+Lx[ages,]),
Lx[ages,]/(Lx[w,]+Lx[ages,]))
rownames(Sx) = 0:w

# fertility
asfr <- asfr %>%
filter(Year >= min_year) %>%
select(-OpenInterval) %>%
fx <- asfr %>%
dplyr::filter(Year >= min_year) %>%
dplyr::select(-OpenInterval) %>%
rbind(
expand.grid(Year = years,
Age = c(0:11,56:100),
Age = c(0:(min(asfr$Age)-1),(max(asfr$Age)+1):OAG),
ASFR = 0)) %>%
arrange(Year, Age) %>%
spread(Year, ASFR) %>%
select(-Age)
rownames(asfr) = 0:100
dplyr::arrange(Year, Age) %>%
tidyr::spread(Year, ASFR) %>%
dplyr::select(-Age) %>%
as.matrix()
rownames(fx) = 0:OAG

# population
N <- pop %>%
mutate(Age = ifelse(Age>100, 100, Age)) %>%
group_by(Year, Age) %>% summarise(N = sum(N)) %>%
filter(Age<=100, Year >= min_year) %>%
arrange(Year, Age) %>%
spread(Year, N) %>%
select(-Age)
rownames(N) = 0:100
Nx <- pop %>%
dplyr::mutate(Age = ifelse(Age>OAG, OAG, Age)) %>%
dplyr::group_by(Year, Age) %>% summarise(N = sum(N)) %>%
dplyr::filter(Age<=OAG, Year >= min_year) %>%
dplyr::arrange(Year, Age) %>%
tidyr::spread(Year, N) %>%
dplyr::select(-Age) %>%
as.matrix()
rownames(Nx) = 0:OAG

# only return data with values
if(any(is.na(colSums(P)))){
if(any(is.na(colSums(Sx)))){
warning("Asked for data out of HMDHFD range")
P <- P[,!is.na(colSums(P))]
Sx <- Sx[,!is.na(colSums(Sx))]
}
if(any(is.na(colSums(asfr)))){
if(any(is.na(colSums(fx)))){
warning("Asked for data out of HMDHFD range")
asfr <- asfr[,!is.na(colSums(asfr))]
fx <- fx[,!is.na(colSums(fx))]
}
if(any(is.na(colSums(N)))){
if(any(is.na(colSums(Nx)))){
warning("Asked for data out of HMDHFD range")
N <- N[,!is.na(colSums(N))]
Nx <- Nx[,!is.na(colSums(Nx))]
}

return(list(P=P, asfr=asfr, N=N))
return(list(px=px,
Sx=Sx,
fx=fx,
Nx=Nx))
}

# save data
# swe_px <- swe_data$px
# swe_Sx <- swe_data$Sx
# swe_asfr <-swe_data$fx
# swe_pop <- swe_data$Nx
# save(swe_px, file = "data/swe_px.rda")
# save(swe_Sx, file = "data/swe_Sx.rda")
# save(swe_asfr, file = "data/swe_asfr.rda")
# save(swe_pop, file = "data/swe_pop.rda")
Loading

0 comments on commit 5ad8a47

Please sign in to comment.