Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assam NobBS Onset date calculation #29

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions NobBS-Experiments/DataAggregation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# install.packages("tidyverse")
library(jsonlite)
library(tidyverse)

consolidate_all_data <- function( ) {
patient_data_1 <- data.frame(fromJSON("https://api.covid19india.org/raw_data1.json"))
patient_data_2 <- data.frame(fromJSON("https://api.covid19india.org/raw_data2.json"))

all_patient_information <- patient_data_1 %>% select(raw_data.detecteddistrict, raw_data.dateannounced)
all_patient_information <- rbind(all_patient_information, patient_data_2 %>% select(raw_data.detecteddistrict,
raw_data.dateannounced))

patient_data_3 <- data.frame(fromJSON("https://api.covid19india.org/raw_data3.json"))
patient_data_3_with_district <- patient_data_3[(patient_data_3$raw_data.detecteddistrict != ""), ]
patient_data_3_with_district$raw_data.numcases <- as.integer(patient_data_3_with_district$raw_data.numcases)
patient_data_3_positive_cases <- patient_data_3_with_district[(patient_data_3_with_district$raw_data.numcases > 0), ]
patient_data_3_positive_cases <- patient_data_3_positive_cases %>% select(raw_data.detecteddistrict,
raw_data.dateannounced, raw_data.numcases)

bloated_districts <- rep(patient_data_3_positive_cases$raw_data.detecteddistrict, patient_data_3_positive_cases$raw_data.numcases)
bloated_datedetected <- rep(patient_data_3_positive_cases$raw_data.dateannounced, patient_data_3_positive_cases$raw_data.numcases)
patient_data_3_formatted <- data.frame(bloated_districts, bloated_datedetected)
patient_data_3_formatted <- patient_data_3_formatted %>% rename(raw_data.detecteddistrict=bloated_districts, raw_data.dateannounced=bloated_datedetected)

all_patient_information <- rbind(all_patient_information, patient_data_3_formatted)
all_patient_information
}
19 changes: 19 additions & 0 deletions NobBS-Experiments/DistrictSimilarity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
library(readxl)

get_all_district_population <- function() {
census_data <- read_excel('IndiaCensus.xlsx')
district_populations <- census_data[(census_data$Level == 'DISTRICT' & census_data$TRU =='Total'), ] %>%
select(Name, TOT_P)
district_populations
}

get_similar_districts_by_population <- function(choosen_district) {
district_populations = get_all_district_population()
actual_dist_pop = district_populations[(district_populations$Name == choosen_district), "TOT_P"]
actual_pop <- actual_dist_pop$TOT_P[1]
lower_limit <- (actual_pop*90)/100
upper_limit <- (actual_pop*100)/100
similar_districts <- district_populations[(district_populations$TOT_P >= lower_limit &
district_populations$TOT_P <= upper_limit), "Name"]
similar_districts$Name
}
96 changes: 96 additions & 0 deletions NobBS-Experiments/Experiment1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# install.packages(c("dplyr", "fGarch", "ggplot2", "jsonlite", "magrittr", "NobBS", "sos", "curl", "ggpubr"))
library(jsonlite)
library(magrittr)
library(sos)
library(dplyr)
library(fGarch)
library(NobBS)
library(ggplot2)
library(ggpubr)
source("DataAggregation.R")
source("DistrictSimilarity.R")

generate_nobbs_estimates <- function(district_name, all_districts_data) {
single_district_data <- all_districts_data[(district == district_name), ]
if(nrow(single_district_data) <= 30) {
print(paste(district_name, " has less than 30 cases"))
return(NULL)
}
print(district_name)
single_district_dates_alone <- single_district_data %>% select(onset_date, report_date)
names(single_district_dates_alone ) <- c("onset_week", "report_week")
nowcast_results <- NobBS(data=single_district_dates_alone, now=as.Date("2020-05-10"), units="1 day",
onset_date="onset_week", report_date="report_week")
print(nowcast_results$estimates)
nowcasts <- nowcast_results$estimates
nowcast_plot <- ggplot(nowcasts) + geom_line(aes(onset_date,estimate,col="Nowcast estimate"),linetype="longdash") +
geom_line(aes(onset_date,n.reported,col="Reported to date"),linetype="solid") +
scale_colour_manual(name="",values=c("indianred3","black"))+
theme_classic()+
geom_ribbon(fill="indianred3",aes(x = onset_date,ymin=nowcasts$lower,
ymax=nowcasts$upper),alpha=0.3)+
xlab("Case onset date") + ylab("Estimated cases") +
ggtitle(paste(district_name, " Nowcast"))

nowcast_plot
}

# Get raw patient data
raw_patient_df <- consolidate_all_data()

# Get names of all districts with patients
district_wise_data <- fromJSON("https://api.covid19india.org/state_district_wise.json")
districts_list = list()
for(p in district_wise_data) { districts_list[names(p$districtData)] <- FALSE}
districts_list[["Unknown"]] <- NULL
districts_df = data.frame(names(districts_list))
colnames(districts_df) <- c("district")

# cleaning up patient data to remove tourist related data and retaining only the columns needed
patient_data = merge(x=districts_df, y=raw_patient_df, by.x='district', by.y='raw_data.detecteddistrict')

#### Calculation of onset date based on announcement date
num_patients <- nrow(patient_data)
# Using Skewed Gaussian with a mean of 9 days and standard deviation 4 to get the incubation time
mu = 9
incubation_times <- floor(rsnorm(num_patients, mean=mu, sd=4, xi=-3))
# removing negative values by adding mean to them
incubation_times[incubation_times<0] <- incubation_times[incubation_times<0] + mu
# Using Uniform Random sample in the interval of [1-3] to get the testing time
testing_time <- floor(runif(num_patients, min=1, max=4))
delay <- incubation_times + testing_time
onset_date <- as.Date(patient_data$raw_data.dateannounced, format ="%d/%m/%Y") - delay

# Create dataframe for all districts
report_date <- as.Date(patient_data$raw_data.dateannounced, format="%d/%m/%Y")
district <- patient_data$district
all_districts_data <- data.frame(district, onset_date, report_date)
all_districts_reliable_data <- all_districts_data[(all_districts_data$onset_date >= as.Date('2020-04-01')), ]
# Examples in all_districts_data dataframe
# district onset_date report_date
# Alappuzha 2020-01-18 2020-02-02
# Thrissur 2020-01-19 2020-01-30
# Kasaragod 2020-01-20 2020-02-03
# Agra 2020-02-17 2020-03-04
# East Delhi 2020-02-18 2020-03-02

NirantK marked this conversation as resolved.
Show resolved Hide resolved

# Get the plots for the top infected districts
district_grouped_data <- group_by(all_districts_reliable_data, district)
district_patient_counts <- summarise(district_grouped_data, n())
names(district_patient_counts) <- c("district", "count")
top_infected_districts <- top_n(district_patient_counts, 3, count)$district
plots_list <- lapply(top_infected_districts, generate_nobbs_estimates, all_districts_reliable_data)
ggarrange(plotlist = plots_list)

# Top Assam districts
assam_districts <- list("Cachar", "Bongaigaon", "Kamrup Metropolitan", "Goalpara", "Karimganj", "Kokrajhar")
plots_list <- lapply(assam_districts, generate_nobbs_estimates, all_districts_data)
ggarrange(plotlist = plots_list)

# Analysis on districts similar to that of Assam.
assam_district <- "Karimganj"
similar_districts <- get_similar_districts_by_population(assam_district)
plots_list <- lapply(similar_districts, generate_nobbs_estimates, all_districts_data)
plots_list[sapply(plots_list, is.null)] <- NULL
ggarrange(plotlist = plots_list)