Skip to content

Commit

Permalink
update w/ local
Browse files Browse the repository at this point in the history
  • Loading branch information
cake-oh committed Sep 30, 2024
1 parent dedc509 commit 0487352
Show file tree
Hide file tree
Showing 32 changed files with 1,124,971 additions and 1 deletion.
Binary file modified .DS_Store
Binary file not shown.
13 changes: 13 additions & 0 deletions artis.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
Binary file added code/.DS_Store
Binary file not shown.
125 changes: 125 additions & 0 deletions code/0_prep_tsunami_elnino_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#### goal: process + visualize other variables ####
## el niño data (oni); tsunami; SDGs


#### [0] prep ####
library(tidyverse)

## load data
# external variables
oni_og <- read_csv("data/oni.csv")
tsunami_og <- read_tsv("data/tsunamis-2024-07-30_09-52-34_-0600.tsv")
sdg2_og <- read_csv("data/Goal2.csv")
sdg3_og <- read_csv("data/Goal3.csv")


#### [1] process + plot oni ####
oni_avg <- oni_og %>% mutate(annual_mean = rowMeans(select(., 2:13), na.rm = TRUE))

p_oni <- ggplot(oni_avg,aes(Year,annual_mean)) + geom_line() +
scale_x_continuous(breaks = seq(1996, 2022, by = 2), limits = c(1996, 2022)) +
labs(title = "Oceanic Niño Index", x = "Year", y = "Annual Mean") +
geom_hline(yintercept = c(-0.5, 0.5), linetype = "dashed", color = c("blue","red")) +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
plot(p_oni)
# ggsave("output/oni_plot.jpg")

# write.csv(oni_avg,"output/oni_avg.csv",row.names = FALSE)


#### [2] process + plot tsunami data ####
# clean, make node list
tsunami_clean <- tsunami_og %>% select(-1) %>% slice(-1)
tsunami_nodes <- tsunami_clean %>% # grab relevant variables, convert country names to iso
select(Country,Year,`Maximum Water Height (m)`,
`Earthquake Magnitude`,Latitude,Longitude) %>%
arrange(Country,Year) %>%
mutate(COUNTRY = countrycode::countrycode(Country, "country.name", "iso3c"),
COUNTRY = ifelse(Country == "KERMADEC ISLANDS", "NZL", COUNTRY))

## plot n
p_tsunami <- tsunami_nodes %>%
count(COUNTRY) %>%
ggplot(aes(x = reorder(COUNTRY, n), y = n)) +
geom_bar(stat = "identity") + coord_flip() +
labs(title = "Tsunamis per Country 2000-2022",
x = "Country",
y = "n") + theme_minimal()
plot(p_tsunami)

# plot average intensity
average_water_height <- tsunami_nodes %>%
group_by(COUNTRY) %>%
summarise(average_max_water_height = mean(`Maximum Water Height (m)`, na.rm = TRUE))

# Create a bar chart of the average maximum water height per country
p_water_height <- ggplot(average_water_height, aes(x = reorder(COUNTRY, average_max_water_height), y = average_max_water_height)) +
geom_bar(stat = "identity") + coord_flip() +
labs(title = "Tsunami Height 2000-2022",
x = "Country",
y = "Average Maximum Water Height (m)") + theme_minimal()
plot(p_water_height)

# write.csv(tsunami_nodes,"output/tsunami_nodes.csv",row.names = FALSE)




#### [3] process + plot SDG data ####
## process
nut_net <- read_csv("output/nut_pict_hs160414.csv")
country_list <- unique(c(nut_net$importer_iso3c,nut_net$exporter_iso3c))

# sdg 2
sdg2.1 <- sdg2_og %>%
filter(SeriesDescription == "Prevalence of undernourishment (%)" & TimePeriod >= 2000 & TimePeriod <= 2020) %>%
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
Value = as.numeric(Value)) %>%
filter(COUNTRY %in% country_list) %>%
filter(!is.na(Value)) %>%
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value)),
Unit="Percentage",SeriesDescription="Prevalence of undernourishment (percentage)") %>%
arrange(COUNTRY, TimePeriod)


# sdg 3
sdg3.1 <- sdg3_og %>%
filter(SeriesDescription == "Mortality rate attributed to cardiovascular disease, cancer, diabetes or chronic respiratory disease (probability)",
TimePeriod >= 2000 & TimePeriod <= 2020) %>%
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
Value = as.numeric(Value)) %>%
filter(COUNTRY %in% country_list) %>%
filter(!is.na(Value)) %>%
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value))) %>%
arrange(COUNTRY, TimePeriod) %>% # Ensure the data is ordered by COUNTRY and TimePeriod
group_by(COUNTRY,TimePeriod) %>%
mutate(Value = mean(Value),
Unit="Probability",
SeriesDescription="Non-communicable disease mortality (probability)") %>% distinct()


# combine all SDGs
sdg_full <- bind_rows(sdg2.1,sdg3.1)



## plot
p_sdg <- ggplot(sdg_full,aes(TimePeriod,Value,group=COUNTRY,color=COUNTRY)) +
geom_path() + theme_minimal()+
labs(title="Sustainable Development Goal Metrics: Food Security and Health",x="Year",y=NULL)+
scale_x_continuous(limits=c(2000,2020),breaks=seq(2000,2020,2),expand=c(0,.1))+
scale_y_continuous(expand=c(0,0))+
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
facet_wrap(~SeriesDescription,scales = "free_y")
plot(p_sdg)
# ggsave("output/sdg_plots.jpg")






68 changes: 68 additions & 0 deletions code/1_merge_nutrients.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#### goal: merge together trade + nutrient data ####

#### [0] prep ####
library(tidyverse)

# load data
artis_og <- read_csv("data/artis_request_nomura.csv") # trade flows
afcd_og <- read_csv("data/nutrient_updated.csv") # nutrient contents per 100g; new data from Jessica Aug 1
# afcd_og <- read_csv("data/nutrient_metadata.csv") #

#### [1] join data ####
## join trade and nutrient data
join_df <- left_join(artis_og,afcd_og)

# clean?
join_df <- join_df %>% filter(dom_source!="error",
method=="capture",
habitat=="marine")

## convert nutrient data to nutrients per tonne - ANNUAL
tonne_to_100g <- 1e4 # 100 grams to tonne

pertonne_annual_df <- join_df %>% # nutrient per tonne
mutate(protein_g_pert = protein_g * tonne_to_100g,
fattyacids_g_pert = fattyacids_g * tonne_to_100g,
calcium_mg_pert = calcium_mg * tonne_to_100g,
zinc_mg_pert = zinc_mg * tonne_to_100g,
iron_mg_pert = iron_mg * tonne_to_100g,
vitamina_mcg_pert = vitamina_mcg * tonne_to_100g,
vitaminb12_mcg_pert = vitaminb12_mcg * tonne_to_100g)

converted_annual_df <- pertonne_annual_df %>% # total nutrients
mutate(protein_total_g = protein_g_pert * product_weight_t,
fattyacids_total_g = fattyacids_g_pert * product_weight_t,
calcium_total_mg = calcium_mg_pert * product_weight_t,
zinc_total_mg = zinc_mg_pert * product_weight_t,
iron_total_mg = iron_mg_pert * product_weight_t,
vitamina_total_mcg = vitamina_mcg_pert * product_weight_t,
vitaminb12_total_mcg = vitaminb12_mcg_pert * product_weight_t)


## convert nutrient data to nutrients per tonne - AGGREGATE
pertonne_allyears_df <- join_df %>% # sum total volumes across years
group_by(importer_iso3c,exporter_iso3c,sciname) %>%
mutate(summed_weight_t = sum(product_weight_t)) %>% ungroup() %>%
select(-c(product_weight_t,live_weight_t,year)) %>% distinct() %>%
mutate(protein_g_pert = protein_g * tonne_to_100g, # convert for tonnes
fattyacids_g_pert = fattyacids_g * tonne_to_100g,
calcium_mg_pert = calcium_mg * tonne_to_100g,
zinc_mg_pert = zinc_mg * tonne_to_100g,
iron_mg_pert = iron_mg * tonne_to_100g,
vitamina_mcg_pert = vitamina_mcg * tonne_to_100g,
vitaminb12_mcg_pert = vitaminb12_mcg * tonne_to_100g)

converted_allyears_df <- pertonne_allyears_df %>% # convert to total nutrients
mutate(protein_total_g = protein_g_pert * summed_weight_t,
fattyacids_total_g = fattyacids_g_pert * summed_weight_t,
calcium_total_mg = calcium_mg_pert * summed_weight_t,
zinc_total_mg = zinc_mg_pert * summed_weight_t,
iron_total_mg = iron_mg_pert * summed_weight_t,
vitamina_total_mcg = vitamina_mcg_pert * summed_weight_t,
vitaminb12_total_mcg = vitaminb12_mcg_pert * summed_weight_t)


#### save output ####
# write.csv(converted_annual_df,"output/nutrient_flows_annual_df.csv",row.names = FALSE)
# write.csv(converted_allyears_df,"output/nutrient_flows_allyears_df.csv",row.names=FALSE)

92 changes: 92 additions & 0 deletions code/2_explore_nutrientflows.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#### goal: explore nutrient flows / ID major import species for PICTs ####
# previous: 1_merge_nutrients.R


#### [0] prep ####
library(tidyverse)
library(vegan) # for shannon's diversity index

nutrient_flows_allyears_og <- read_csv("output/nutrient_flows_allyears_df.csv")
nutrient_flows_annual_og <- read_csv("output/nutrient_flows_annual_df.csv")


#### [1] filter for PICTs and hs codes ####
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")
hs_codes <- 160414

# filter PICTs in importers or exporters (could also include source data) + hs codes
nut_pict_hs <- nutrient_flows_annual_og %>%
filter(importer_iso3c %in% picts |
exporter_iso3c %in% picts) %>%
filter(hs6==hs_codes)

# aggregate - sum total for each nutrient
nut_pict_hs_sum <- nut_pict_hs %>% group_by(importer_iso3c,exporter_iso3c,year) %>%
summarise(total_weight_t = sum(product_weight_t),
protein_sum=sum(protein_total_g),
fattyacids_sum=sum(fattyacids_total_g),
calcium_sum=sum(calcium_total_mg),
zinc_sum=sum(zinc_total_mg),
iron_sum=sum(iron_total_mg),
vita_sum=sum(vitamina_total_mcg),
vitb12_sum=sum(vitaminb12_total_mcg))

#### [2] calculate diversity metrics by normalized dietary contributions ####
## get dietary recommendations
## values from nih https://ods.od.nih.gov/HealthInformation/nutrientrecommendations.aspx
## values for fatty acids averaged for male adults over 4 years old from nih https://ods.od.nih.gov/factsheets/Omega3FattyAcids-HealthProfessional/#h2
dietrec_df <- data_frame(category=c("protein","fattyacids","calcium",
"zinc","iron","vita","vitb12"),
rec_intake_adults=c(50,1.4,1300,
11,18,900,2.4),
unit=c("g","g","mg",
"mg","mg","mcg","mcg"))
# write.csv(dietrec_df,"output/dietrec_df.csv",row.names = FALSE)

## normalize and calculate diversity
# extract nutrient columns ending with "_sum"
nutrient_columns <- grep("_sum$", names(nut_pict_hs_sum), value = TRUE)

# create dictionary for quick lookup of recommended intake values
rec_intake_dict <- setNames(dietrec_df$rec_intake_adults, dietrec_df$category)

# normalize each nutrient column
nut_pict_hs_div <- nut_pict_hs_sum

for (col in nutrient_columns) {
nutrient <- sub("_sum$", "", col)
if (nutrient %in% names(rec_intake_dict)) {
recommended_intake <- rec_intake_dict[[nutrient]]
new_col_name <- paste0(nutrient, "_norm")
nut_pict_hs_div[[new_col_name]] <- nut_pict_hs_div[[col]] / recommended_intake
}
}

# calculate diversity
normalized_columns <- grep("_norm$", names(nut_pict_hs_div), value = TRUE)

nut_pict_hs_div$shannon_diversity <- apply(nut_pict_hs_div[normalized_columns], 1,
function(x) diversity(x, index = "shannon"))
nut_pict_hs_div$norm_sum <- rowSums(nut_pict_hs_div[normalized_columns])


## aggregate year version - find overall top volumes
# nut_pict_all <- nutrient_flows_allyears_og %>%
# filter(importer_iso3c %in% picts |
# exporter_iso3c %in% picts)
# nut_pict_all_top <- nut_pict_all %>%
# select(-c(importer_iso3c,exporter_iso3c,source_country_iso3c)) %>%
# group_by(sciname) %>% #summarise(total_weight=sum(summed_weight_t)) %>%
# arrange(desc(total_weight)) %>%
# slice_head(n=10)

#### save output ####
# PICT names
# write.csv(picts,"output/pict_names.csv",row.names = FALSE)
# top species
# write.csv(nut_pict_all_top,"output/top_sciname.csv",row.names = FALSE)

# pruned annual data at hs level 160414
# write.csv(nut_pict_hs_div,"output/nut_pict_hs160414.csv",row.names = FALSE)


85 changes: 85 additions & 0 deletions code/4_plotsankey.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#### goal: make a sankey diagram with nutrient categories flowing pics <-> non-pics ####

#### [0] prep ####
library(tidyverse)
# library(networkD3)
library(ggalluvial)


nut_net_og <- read_csv("output/nut_pict_hs160414.csv")




#### [1] process - ggalluvial version #####
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")

# Add PICS/non-PICS IDs
net_class <- nut_net_og %>%
mutate(
ImporterClass = ifelse(importer_iso3c %in% picts, "PICs", "Non-\nPICs"),
ExporterClass = ifelse(exporter_iso3c %in% picts, "PICs", "Non-\nPICs")
)


# List of nutrient columns
nutrients <- c("protein_sum", "zinc_sum", "fattyacids_sum",
"iron_sum", "vita_sum","vitb12_sum","calcium_sum")

# Aggregate data across all years for each nutrient
net_agg <- net_class %>%
filter(!(ExporterClass == "PICs" & ImporterClass == "PICs")) %>%
group_by(ExporterClass, ImporterClass) %>%
summarise(across(all_of(nutrients), sum, na.rm = TRUE)) %>%
pivot_longer(cols = all_of(nutrients), names_to = "Nutrient", values_to = "Value") %>%
ungroup() %>%
mutate(Nutrient = str_replace(Nutrient, "_sum$", "")) %>%
mutate(Nutrient = str_to_title(Nutrient))

alluvial_data <- net_agg %>%
mutate(Flow = paste(ExporterClass, "to", ImporterClass)) %>%
group_by(Flow, Nutrient) %>%
summarise(Value = sum(Value)) %>%
separate(Flow, into = c("ExporterClass", "ImporterClass"), sep = " to ") %>%
ungroup() %>% group_by(ExporterClass,ImporterClass)%>%
mutate(Proportion = Value / sum(Value)) %>%
mutate(Nutrient = factor(Nutrient, levels = unique(Nutrient)),
ExporterClass = factor(ExporterClass, levels = unique(ExporterClass)),
ImporterClass = factor(ImporterClass, levels = unique(ImporterClass))) %>%
droplevels()

# Create labels for proportions
alluvial_data <- alluvial_data %>%
mutate(Label = paste0(round(Proportion * 100, 1), "%"))

exporter_labels <- alluvial_data %>%
group_by(ExporterClass) %>%
mutate(mid_y = cumsum(Value) + 1 * Value)


# Create the plot
ggplot(alluvial_data,
aes(axis1 = ExporterClass, axis2 = ImporterClass, y = Value, fill = Nutrient)) +
geom_alluvium(width = 1/12, knot.pos = 0.5) +
geom_stratum(aes(color = ExporterClass), fill="grey95",width = 1/12, show.legend = FALSE) +
# geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3, vjust = -0.5) +
geom_text(aes(label = Label,x=1.2), size = 5, color = "black", position = position_stack(vjust = .3),check_overlap = TRUE) +
scale_x_discrete(limits = c("ExporterClass", "ImporterClass"), expand = c(0.1, 0.1),
labels = c("ExporterClass" = "Exporter", "ImporterClass" = "Importer")) +
labs(title = "Nutrient Flows Between PICs and Non-PICs",x=NULL,
y = NULL, fill = "Nutrient") +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
legend.title = element_text(size=15),
legend.text = element_text(size=13),
axis.text.x = element_text(size = 15),
plot.title = element_text(hjust = 0.5,size=17)) +
guides(fill = guide_legend(title = "Nutrient"),color="none")

# ggsave("output/sankey_nutrients.jpg")
#



Loading

0 comments on commit 0487352

Please sign in to comment.