Merge pull request #1 from IvanWilli/main

updating fork...
IvanWilli · Jan 4, 2023 · 5ad8a47 · 5ad8a47
2 parents 1989489 + 76df243
commit 5ad8a47
Show file tree

Hide file tree

Showing 60 changed files with 2,410 additions and 997 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,6 @@
 .Ruserdata
 inst/doc
 diego.txt
+.m
+.mat
+.csv
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,20 +1,37 @@
 Package: DemoKin
 Title: Demokin
-Description: Estimate kin counts for stable and non stable populations
-Version: 0.0.0.9000
+Description: Estimate population kin counts and its distribution by type and age
+Version: 1.0.0
 Authors@R: c(
     person("Iván", "Williams", email = "act.ivanwilliams@gmail.com", role = "cre"),
-    person("Diego", "Alburez-Gutierrez", email = "alburezgutierrez@demogr.mpg.de", role = "aut"))
+    person("Diego", "Alburez-Gutierrez", email = "alburezgutierrez@demogr.mpg.de", role = "aut"),
+    person("Xi", "Song", email = "xisong@sas.upenn.edu", role = "ctb"))
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.1
 Suggests: 
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0),
+    ggplot2
 VignetteBuilder: knitr
-Imports: tidyverse, DiagrammeR, HMDHFDplus, devtools
+Imports: 
+    dplyr,
+    tidyr,
+    purrr,
+    forcats,
+    HMDHFDplus,
+    progress,
+    matrixcalc,
+    Matrix,
+    MASS,
+    stats,
+    igraph,
+    magrittr,
+    lifecycle
 BugReports: https://github.com/IvanWilli/DemoKin/issues
 Depends: 
     R (>= 2.10)
+Config/testthat/edition: 3
diff --git a/DemoKin-Logo.png b/DemoKin-Logo.png
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,8 +1,12 @@
 # Generated by roxygen2: do not edit by hand
 
+export("%>%")
+export(demokin_codes)
 export(get_HMDHFD)
 export(kin)
-export(kin_non_stable)
-export(kin_stable)
+export(kin_multi_stage)
+export(kin_time_invariant)
+export(kin_time_variant)
 export(plot_diagram)
-importFrom(DiagrammeR,mermaid)
+export(rename_kin)
+importFrom(magrittr,"%>%")
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,6 @@
+# DemoKin 1.0.0
+
+* Added a `NEWS.md` file to track changes to the package.
+* Change stable/non-stable references to time varying/non-varying rates.
+* Add multi-state process.
+
diff --git a/R/aux_funs.R b/R/aux_funs.R
@@ -0,0 +1,49 @@
+
+#' print kin codes
+#' @description Print kin codes and labels
+#' @export
+demokin_codes <- function(){
+  codes <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")
+  caswell_codes <- c("t", "v", "a", "b", "c", "h", "g", "d", "p", "q", "r", "s", "m", "n")
+  labels <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister")
+  data.frame(DemoKin = codes, Caswell = caswell_codes, Label = labels, row.names = NULL)
+}
+
+#' rename kin
+#' @description Rename kin labels depending consolidate some types
+#' @export
+rename_kin <- function(df, consolidate_column = "no"){
+
+  stopifnot("Argument 'consolidate_column' should be 'no' or a valid column name" = consolidate_column %in% c("no", colnames(df)))
+
+  if(consolidate_column == "no"){
+
+    relatives <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister")
+    names(relatives) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")
+
+  } else {
+
+    # Combine kin types irrespective of whether they come from older
+    # or younger sibling lines
+    consolidate_vec <- c("c", "c", "d", "gd", "ggd", "ggm", "gm", "m", "n", "n", "a", "a", "s", "s")
+    names(consolidate_vec) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys")
+
+    # Rename kin types from codes to actual words
+    relatives <- c("Cousins", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces", "Aunt", "Sister")
+    names(relatives) <-  unique(consolidate_vec)
+
+    df <- as.data.frame(df)
+    df$count <- df[ , consolidate_column]
+
+    df <-
+      df %>%
+      dplyr::mutate(kin = consolidate_vec[kin]) %>%
+      dplyr::group_by(age_focal, kin) %>%
+      dplyr::summarise(count = sum(count)) %>%
+      dplyr::ungroup()
+
+
+  }
+  df$kin <- relatives[df$kin]
+  df
+}
diff --git a/R/data.R b/R/data.R
@@ -7,7 +7,16 @@
 #'
 #' @source
 #'   HMD/HFD
-"swe_surv"
+"swe_Sx"
+
+#' Female swedish survival probabilities from 1900 to 2015
+#' @docType data
+#' @format
+#'   A matrix with years as cols and ages (0 to 100 as OAG) as rows.
+#'
+#' @source
+#'   HMD/HFD
+"swe_px"
 
 #' Swedish age-specific fertility rates from 1900 to 2015
 #'
@@ -63,3 +72,58 @@
 #' @source
 #'   Caswell (2019)
 "pi_caswell_2021"
+
+#' Female Slovakian survival probabilities by parity stage in 1990 (Caswell, 2021)
+#'
+#' Female Slovakian survival probabilities by parity stage in 1990 (Caswell, 2021)
+#' @docType data
+#' @format
+#'   A matrix of px with stages as cols and ages as rows.
+#'
+#' @source
+#'   Caswell (2021)
+"svk_pxs"
+
+#' Female Slovakian fertility rates by parity stage in 1990 (Caswell, 2021)
+#'
+#' Female Slovakian fertility rates by parity stage in 1990 (Caswell, 2021)
+#' @docType data
+#' @format
+#'   A matrix of fx with stages as cols and ages as rows.
+#'
+#' @source
+#'   Caswell (2021)
+"svk_fxs"
+
+#' Age where assign offspring of individuals in each partity stage (Caswell, 2021). All to zero age in this case.
+#'
+#' Age where assign offspring of individuals in each partity stage (Caswell, 2021). All to zero age in this case.
+#' @docType data
+#' @format
+#'   A matrix of ones in ages where assign offspring individuals, with stages as cols and ages as rows.
+#'
+#' @source
+#'   Caswell (2021)
+"svk_Hxs"
+
+#' Probability of transition among parity stage for Slovakia in 1990, for each age, conditional on survival (Caswell, 2021).
+#'
+#' Probability of transition among parity stage for Slovakia in 1990, for each age, conditional on survival (Caswell, 2021).
+#' @docType data
+#' @format
+#' A list of column-stochastic matrix with probabilities of transition among parity stage, for each age, conditional on survival.
+#'
+#' @source
+#'   Caswell (2021)
+"svk_Uxs"
+
+#' Output for Slovakia 1990 in Caswell (2020).
+#'
+#' Output for Slovakia 1990 in Caswell (2020).
+#' @docType data
+#' @format
+#' A list with specific kin types age-stage matrix
+#'
+#' @source
+#'   Caswell (2021)
+"kin_svk1990_caswell2020"
diff --git a/R/get_HMDHFD.R b/R/get_HMDHFD.R
@@ -1,89 +1,125 @@
 #' Get time serie matrix data from HMD/HFD
 
-#' @description Wrapper function to get data.frames of survival, fertlity and population
-#' of selected countriy on selected period.
+#' @description Wrapper function to get data of female survival, fertlity and population
+#' of selected country on selected period.
 
 #' @param country numeric. Country code from rom HMD/HFD.
 #' @param max_year numeric. Latest year to get data.
 #' @param min_year integer. Older year to get data.
-#' @param user character. From HMD/HFD.
-#' @param pass character. From HMD/HFD.
-#'
-#' @return A list wiith survival, fertility and poopulation age specific matrixes, with calendar year as colnames.
+#' @param user_HMD character. From HMD.
+#' @param user_HFD character. From HFD.
+#' @param pass_HMD character. From HMD.
+#' @param pass_HFD character. From HFD.
+#' @param OAG numeric. Open age group to standarize output.
+#' @return A list wiith female survival probability, survival function, fertility and poopulation age specific matrixes, with calendar year as colnames.
 #' @export
 
 get_HMDHFD <- function(country = "SWE",
                          min_year = 1900,
                          max_year = 2018,
-                         user = NULL,
-                         pass = NULL){
+                         user_HMD = NULL,
+                         pass_HMD = NULL,
+                         user_HFD = NULL,
+                         pass_HFD = NULL,
+                         OAG = 100){
 
-  # source HMD HFD - use SWE now-----------------------------------------------------------------
-  pop <- readHMDweb(CNTRY = country, "Population", user, pass, fixup = TRUE) %>%
-          select(Year, Age, N = Female1)%>%
-          filter(Year >= min_year, Year <= max_year)
-  lt <- readHMDweb(country, "fltper_1x1", user, pass, fixup = TRUE) %>%
-          filter(Year >= min_year, Year <= max_year)
-  asfr <- readHFDweb(country, "asfrRR", user, "52962", fixup = TRUE)%>%
-          filter(Year >= min_year, Year <= max_year)
+  if(any(c(is.null(user_HMD), is.null(user_HFD), is.null(pass_HMD), is.null(pass_HFD)))){
+    stop("The function needs HMD and HMF access.")
+  }
+
+  # source HMD HFD -----------------------------------------------------------------
+  pop <- HMDHFDplus::readHMDweb(CNTRY = country, "Population", user_HMD, pass_HMD, fixup = TRUE) %>%
+          dplyr::select(Year, Age, N = Female1)%>%
+          dplyr::filter(Year >= min_year, Year <= max_year)
+  lt <- HMDHFDplus::readHMDweb(country, "fltper_1x1", user_HMD, pass_HMD, fixup = TRUE) %>%
+          dplyr::filter(Year >= min_year, Year <= max_year)
+  asfr <- HMDHFDplus::readHFDweb(country, "asfrRR", user_HFD, pass_HFD, fixup = TRUE)%>%
+          dplyr::filter(Year >= min_year, Year <= max_year)
 
   # list of yearly Leslie matrix ---------------------------------------------------
 
-  age = 0:100
+  age = 0:OAG
   ages = length(age)
   w = last(age)
   last_year = max(lt$Year)
   years = min_year:last_year
 
-  # survival probabilities
-  L <- lt %>%
-    filter(Age<=w) %>%
-    mutate(Lx = ifelse(Age==w, Tx, Lx)) %>%
-    select(Year, Age, Lx) %>%
-    spread(Year, Lx) %>%
-    select(-Age)
+  # survival probability
+  px <- lt %>%
+    dplyr::filter(Age<=OAG) %>%
+    dplyr::mutate(px = 1 - qx,
+           px = ifelse(Age==OAG, 0, px)) %>%
+    dplyr::select(Year, Age, px) %>%
+    tidyr::pivot_wider(names_from = "Year", values_from = "px") %>%
+    dplyr::select(-Age) %>%
+    as.matrix()
+  rownames(px) = 0:OAG
 
-  P <- rbind(L[c(-1,-101),]/L[-c(100:101),],
-             L[101,]/(L[100,]+L[101,]),
-             L[101,]/(L[100,]+L[101,]))
-  rownames(P) = 0:100
+  # survival function
+  Lx <- lt %>%
+    dplyr::filter(Age<=OAG) %>%
+    dplyr::mutate(Lx = ifelse(Age==OAG, Tx, Lx)) %>%
+    dplyr::select(Year, Age, Lx) %>%
+    tidyr::pivot_wider(names_from = "Year", values_from = "Lx") %>%
+    dplyr::select(-Age) %>%
+    as.matrix()
+
+  Sx <- rbind(Lx[c(-1,-ages),]/Lx[-c(w:ages),],
+             Lx[ages,]/(Lx[w,]+Lx[ages,]),
+             Lx[ages,]/(Lx[w,]+Lx[ages,]))
+  rownames(Sx) = 0:w
 
   # fertility
-  asfr <- asfr %>%
-    filter(Year >= min_year) %>%
-    select(-OpenInterval) %>%
+  fx <- asfr %>%
+    dplyr::filter(Year >= min_year) %>%
+    dplyr::select(-OpenInterval) %>%
     rbind(
       expand.grid(Year = years,
-                  Age = c(0:11,56:100),
+                  Age = c(0:(min(asfr$Age)-1),(max(asfr$Age)+1):OAG),
                   ASFR = 0)) %>%
-    arrange(Year, Age) %>%
-    spread(Year, ASFR) %>%
-    select(-Age)
-  rownames(asfr) = 0:100
+    dplyr::arrange(Year, Age) %>%
+    tidyr::spread(Year, ASFR) %>%
+    dplyr::select(-Age) %>%
+    as.matrix()
+  rownames(fx) = 0:OAG
 
   # population
-  N <- pop %>%
-    mutate(Age = ifelse(Age>100, 100, Age)) %>%
-    group_by(Year, Age) %>% summarise(N = sum(N)) %>%
-    filter(Age<=100, Year >= min_year) %>%
-    arrange(Year, Age) %>%
-    spread(Year, N) %>%
-    select(-Age)
-  rownames(N) = 0:100
+  Nx <- pop %>%
+    dplyr::mutate(Age = ifelse(Age>OAG, OAG, Age)) %>%
+    dplyr::group_by(Year, Age) %>% summarise(N = sum(N)) %>%
+    dplyr::filter(Age<=OAG, Year >= min_year) %>%
+    dplyr::arrange(Year, Age) %>%
+    tidyr::spread(Year, N) %>%
+    dplyr::select(-Age) %>%
+    as.matrix()
+  rownames(Nx) = 0:OAG
 
   # only return data with values
-  if(any(is.na(colSums(P)))){
+  if(any(is.na(colSums(Sx)))){
     warning("Asked for data out of HMDHFD range")
-    P <- P[,!is.na(colSums(P))]
+    Sx <- Sx[,!is.na(colSums(Sx))]
   }
-  if(any(is.na(colSums(asfr)))){
+  if(any(is.na(colSums(fx)))){
     warning("Asked for data out of HMDHFD range")
-    asfr <- asfr[,!is.na(colSums(asfr))]
+    fx <- fx[,!is.na(colSums(fx))]
   }
-  if(any(is.na(colSums(N)))){
+  if(any(is.na(colSums(Nx)))){
     warning("Asked for data out of HMDHFD range")
-    N <- N[,!is.na(colSums(N))]
+    Nx <- Nx[,!is.na(colSums(Nx))]
   }
 
-  return(list(P=P, asfr=asfr, N=N))
+  return(list(px=px,
+              Sx=Sx,
+              fx=fx,
+              Nx=Nx))
 }
+
+# save data
+  # swe_px <- swe_data$px
+  # swe_Sx <- swe_data$Sx
+  # swe_asfr <-swe_data$fx
+  # swe_pop <- swe_data$Nx
+  # save(swe_px, file = "data/swe_px.rda")
+  # save(swe_Sx, file = "data/swe_Sx.rda")
+  # save(swe_asfr, file = "data/swe_asfr.rda")
+  # save(swe_pop, file = "data/swe_pop.rda")