update w/ local

CU-ESIIL · Sep 30, 2024 · 0487352 · 0487352
1 parent dedc509
commit 0487352
Show file tree

Hide file tree

Showing 32 changed files with 1,124,971 additions and 1 deletion.
diff --git a/.DS_Store b/.DS_Store
diff --git a/artis.Rproj b/artis.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/code/.DS_Store b/code/.DS_Store
diff --git a/code/0_prep_tsunami_elnino_data.R b/code/0_prep_tsunami_elnino_data.R
@@ -0,0 +1,125 @@
+#### goal: process + visualize other variables ####
+## el niño data (oni); tsunami; SDGs
+
+
+#### [0] prep ####
+library(tidyverse)
+
+## load data
+# external variables
+oni_og <- read_csv("data/oni.csv")
+tsunami_og <- read_tsv("data/tsunamis-2024-07-30_09-52-34_-0600.tsv")
+sdg2_og <- read_csv("data/Goal2.csv")
+sdg3_og <- read_csv("data/Goal3.csv")
+
+
+#### [1] process + plot oni ####
+oni_avg <- oni_og %>% mutate(annual_mean = rowMeans(select(., 2:13), na.rm = TRUE))
+
+p_oni <- ggplot(oni_avg,aes(Year,annual_mean)) + geom_line() +
+  scale_x_continuous(breaks = seq(1996, 2022, by = 2), limits = c(1996, 2022)) +
+  labs(title = "Oceanic Niño Index", x = "Year", y = "Annual Mean") +
+  geom_hline(yintercept = c(-0.5, 0.5), linetype = "dashed", color = c("blue","red")) +
+  theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
+plot(p_oni)
+# ggsave("output/oni_plot.jpg")
+
+# write.csv(oni_avg,"output/oni_avg.csv",row.names = FALSE)
+
+
+#### [2] process + plot tsunami data ####
+# clean, make node list
+tsunami_clean <- tsunami_og %>% select(-1) %>% slice(-1)
+tsunami_nodes <- tsunami_clean %>% # grab relevant variables, convert country names to iso
+  select(Country,Year,`Maximum Water Height (m)`,
+         `Earthquake Magnitude`,Latitude,Longitude) %>%
+  arrange(Country,Year) %>% 
+  mutate(COUNTRY = countrycode::countrycode(Country, "country.name", "iso3c"),
+         COUNTRY = ifelse(Country == "KERMADEC ISLANDS", "NZL", COUNTRY))
+
+## plot n
+p_tsunami <- tsunami_nodes %>%
+  count(COUNTRY) %>%
+  ggplot(aes(x = reorder(COUNTRY, n), y = n)) +
+  geom_bar(stat = "identity") + coord_flip() +
+  labs(title = "Tsunamis per Country 2000-2022",
+       x = "Country",
+       y = "n") + theme_minimal()
+plot(p_tsunami)
+
+# plot average intensity
+average_water_height <- tsunami_nodes %>%
+  group_by(COUNTRY) %>%
+  summarise(average_max_water_height = mean(`Maximum Water Height (m)`, na.rm = TRUE))
+
+# Create a bar chart of the average maximum water height per country
+p_water_height <- ggplot(average_water_height, aes(x = reorder(COUNTRY, average_max_water_height), y = average_max_water_height)) +
+  geom_bar(stat = "identity") + coord_flip() +
+  labs(title = "Tsunami Height 2000-2022",
+       x = "Country",
+       y = "Average Maximum Water Height (m)") + theme_minimal()
+plot(p_water_height)
+
+# write.csv(tsunami_nodes,"output/tsunami_nodes.csv",row.names = FALSE)
+
+
+
+
+#### [3] process + plot SDG data ####
+## process
+nut_net <- read_csv("output/nut_pict_hs160414.csv")
+country_list <- unique(c(nut_net$importer_iso3c,nut_net$exporter_iso3c))
+
+# sdg 2
+sdg2.1 <- sdg2_og %>%
+  filter(SeriesDescription == "Prevalence of undernourishment (%)" & TimePeriod >= 2000 & TimePeriod <= 2020) %>%
+  select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
+  mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
+    Value = as.numeric(Value)) %>%
+  filter(COUNTRY %in% country_list) %>%
+  filter(!is.na(Value)) %>%
+  mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value)),
+         Unit="Percentage",SeriesDescription="Prevalence of undernourishment (percentage)") %>%
+  arrange(COUNTRY, TimePeriod) 
+
+
+# sdg 3
+sdg3.1 <- sdg3_og %>% 
+  filter(SeriesDescription == "Mortality rate attributed to cardiovascular disease, cancer, diabetes or chronic respiratory disease (probability)", 
+           TimePeriod >= 2000 & TimePeriod <= 2020) %>%
+  select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
+  mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
+    Value = as.numeric(Value)) %>%
+  filter(COUNTRY %in% country_list) %>%
+  filter(!is.na(Value)) %>%
+  mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value))) %>%
+  arrange(COUNTRY, TimePeriod)  %>% # Ensure the data is ordered by COUNTRY and TimePeriod
+  group_by(COUNTRY,TimePeriod) %>%
+  mutate(Value = mean(Value),
+         Unit="Probability",
+         SeriesDescription="Non-communicable disease mortality (probability)") %>% distinct()
+
+
+# combine all SDGs
+sdg_full <- bind_rows(sdg2.1,sdg3.1)
+
+
+
+## plot
+p_sdg <- ggplot(sdg_full,aes(TimePeriod,Value,group=COUNTRY,color=COUNTRY)) +
+  geom_path() + theme_minimal()+
+  labs(title="Sustainable Development Goal Metrics: Food Security and Health",x="Year",y=NULL)+
+  scale_x_continuous(limits=c(2000,2020),breaks=seq(2000,2020,2),expand=c(0,.1))+
+  scale_y_continuous(expand=c(0,0))+
+  theme(legend.position = "none",
+        axis.text.x = element_text(angle = 45, hjust = 1),
+        plot.title = element_text(hjust = 0.5)) + 
+  facet_wrap(~SeriesDescription,scales = "free_y")
+plot(p_sdg)
+# ggsave("output/sdg_plots.jpg")
+
+
+
+
+
+
diff --git a/code/1_merge_nutrients.R b/code/1_merge_nutrients.R
@@ -0,0 +1,68 @@
+#### goal: merge together trade + nutrient data ####
+
+#### [0] prep ####
+library(tidyverse)
+
+# load data
+artis_og <- read_csv("data/artis_request_nomura.csv") # trade flows
+afcd_og <- read_csv("data/nutrient_updated.csv") # nutrient contents per 100g; new data from Jessica Aug 1
+# afcd_og <- read_csv("data/nutrient_metadata.csv") # 
+
+#### [1] join data ####
+## join trade and nutrient data
+join_df <- left_join(artis_og,afcd_og)
+
+# clean? 
+join_df <- join_df %>% filter(dom_source!="error", 
+                              method=="capture",
+                              habitat=="marine")
+
+## convert nutrient data to nutrients per tonne - ANNUAL
+tonne_to_100g <- 1e4 # 100 grams to tonne
+
+pertonne_annual_df <- join_df %>% # nutrient per tonne
+  mutate(protein_g_pert = protein_g * tonne_to_100g, 
+         fattyacids_g_pert = fattyacids_g * tonne_to_100g,
+         calcium_mg_pert = calcium_mg * tonne_to_100g,
+         zinc_mg_pert = zinc_mg * tonne_to_100g,
+         iron_mg_pert = iron_mg * tonne_to_100g,
+         vitamina_mcg_pert = vitamina_mcg * tonne_to_100g,
+         vitaminb12_mcg_pert = vitaminb12_mcg * tonne_to_100g)
+
+converted_annual_df <- pertonne_annual_df %>% # total nutrients
+  mutate(protein_total_g = protein_g_pert * product_weight_t,
+         fattyacids_total_g = fattyacids_g_pert * product_weight_t,
+         calcium_total_mg = calcium_mg_pert * product_weight_t,
+         zinc_total_mg = zinc_mg_pert * product_weight_t,
+         iron_total_mg = iron_mg_pert * product_weight_t,
+         vitamina_total_mcg = vitamina_mcg_pert * product_weight_t,
+         vitaminb12_total_mcg = vitaminb12_mcg_pert * product_weight_t) 
+
+
+## convert nutrient data to nutrients per tonne - AGGREGATE
+pertonne_allyears_df <- join_df %>% # sum total volumes across years
+  group_by(importer_iso3c,exporter_iso3c,sciname) %>%
+  mutate(summed_weight_t = sum(product_weight_t)) %>% ungroup() %>%
+  select(-c(product_weight_t,live_weight_t,year)) %>% distinct() %>%
+  mutate(protein_g_pert = protein_g * tonne_to_100g, # convert for tonnes
+         fattyacids_g_pert = fattyacids_g * tonne_to_100g,
+         calcium_mg_pert = calcium_mg * tonne_to_100g,
+         zinc_mg_pert = zinc_mg * tonne_to_100g,
+         iron_mg_pert = iron_mg * tonne_to_100g,
+         vitamina_mcg_pert = vitamina_mcg * tonne_to_100g,
+         vitaminb12_mcg_pert = vitaminb12_mcg * tonne_to_100g)
+
+converted_allyears_df <- pertonne_allyears_df %>% # convert to total nutrients 
+  mutate(protein_total_g = protein_g_pert * summed_weight_t,
+         fattyacids_total_g = fattyacids_g_pert * summed_weight_t,
+         calcium_total_mg = calcium_mg_pert * summed_weight_t,
+         zinc_total_mg = zinc_mg_pert * summed_weight_t,
+         iron_total_mg = iron_mg_pert * summed_weight_t,
+         vitamina_total_mcg = vitamina_mcg_pert * summed_weight_t,
+         vitaminb12_total_mcg = vitaminb12_mcg_pert * summed_weight_t) 
+
+
+#### save output ####
+# write.csv(converted_annual_df,"output/nutrient_flows_annual_df.csv",row.names = FALSE)
+# write.csv(converted_allyears_df,"output/nutrient_flows_allyears_df.csv",row.names=FALSE)
+
diff --git a/code/2_explore_nutrientflows.R b/code/2_explore_nutrientflows.R
@@ -0,0 +1,92 @@
+#### goal: explore nutrient flows / ID major import species for PICTs ####
+# previous: 1_merge_nutrients.R
+
+
+#### [0] prep ####
+library(tidyverse)
+library(vegan) # for shannon's diversity index
+
+nutrient_flows_allyears_og <- read_csv("output/nutrient_flows_allyears_df.csv")
+nutrient_flows_annual_og <- read_csv("output/nutrient_flows_annual_df.csv")
+
+
+#### [1] filter for PICTs and hs codes ####
+picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")
+hs_codes <- 160414
+
+# filter PICTs in importers or exporters (could also include source data) + hs codes
+nut_pict_hs <- nutrient_flows_annual_og %>% 
+  filter(importer_iso3c %in% picts |
+           exporter_iso3c %in% picts) %>% 
+  filter(hs6==hs_codes)
+
+# aggregate - sum total for each nutrient
+nut_pict_hs_sum <- nut_pict_hs %>% group_by(importer_iso3c,exporter_iso3c,year) %>%
+  summarise(total_weight_t = sum(product_weight_t),
+            protein_sum=sum(protein_total_g),
+            fattyacids_sum=sum(fattyacids_total_g),
+            calcium_sum=sum(calcium_total_mg),
+            zinc_sum=sum(zinc_total_mg),
+            iron_sum=sum(iron_total_mg),
+            vita_sum=sum(vitamina_total_mcg),
+            vitb12_sum=sum(vitaminb12_total_mcg))
+
+#### [2] calculate diversity metrics by normalized dietary contributions ####
+## get dietary recommendations
+## values from nih https://ods.od.nih.gov/HealthInformation/nutrientrecommendations.aspx
+## values for fatty acids averaged for male adults over 4 years old from nih https://ods.od.nih.gov/factsheets/Omega3FattyAcids-HealthProfessional/#h2
+dietrec_df <- data_frame(category=c("protein","fattyacids","calcium",
+                                    "zinc","iron","vita","vitb12"),
+                         rec_intake_adults=c(50,1.4,1300,
+                                             11,18,900,2.4),
+                         unit=c("g","g","mg",
+                                "mg","mg","mcg","mcg"))
+# write.csv(dietrec_df,"output/dietrec_df.csv",row.names = FALSE)
+
+## normalize and calculate diversity
+# extract nutrient columns ending with "_sum"
+nutrient_columns <- grep("_sum$", names(nut_pict_hs_sum), value = TRUE)
+
+# create dictionary for quick lookup of recommended intake values
+rec_intake_dict <- setNames(dietrec_df$rec_intake_adults, dietrec_df$category)
+
+# normalize each nutrient column
+nut_pict_hs_div <- nut_pict_hs_sum
+
+for (col in nutrient_columns) {
+  nutrient <- sub("_sum$", "", col)
+  if (nutrient %in% names(rec_intake_dict)) {
+    recommended_intake <- rec_intake_dict[[nutrient]]
+    new_col_name <- paste0(nutrient, "_norm")
+    nut_pict_hs_div[[new_col_name]] <- nut_pict_hs_div[[col]] / recommended_intake
+  }
+}
+
+# calculate diversity
+normalized_columns <- grep("_norm$", names(nut_pict_hs_div), value = TRUE)
+
+nut_pict_hs_div$shannon_diversity <- apply(nut_pict_hs_div[normalized_columns], 1,
+                                           function(x) diversity(x, index = "shannon"))
+nut_pict_hs_div$norm_sum <- rowSums(nut_pict_hs_div[normalized_columns])
+
+
+## aggregate year version - find overall top volumes
+# nut_pict_all <- nutrient_flows_allyears_og %>% 
+#   filter(importer_iso3c %in% picts |
+#            exporter_iso3c %in% picts) 
+# nut_pict_all_top <- nut_pict_all %>%
+#   select(-c(importer_iso3c,exporter_iso3c,source_country_iso3c)) %>%
+#   group_by(sciname) %>% #summarise(total_weight=sum(summed_weight_t)) %>%
+#   arrange(desc(total_weight)) %>%
+#   slice_head(n=10)
+
+#### save output ####
+# PICT names
+# write.csv(picts,"output/pict_names.csv",row.names = FALSE) 
+# top species
+# write.csv(nut_pict_all_top,"output/top_sciname.csv",row.names = FALSE) 
+
+# pruned annual data at hs level 160414
+# write.csv(nut_pict_hs_div,"output/nut_pict_hs160414.csv",row.names = FALSE) 
+
+
diff --git a/code/4_plotsankey.R b/code/4_plotsankey.R
@@ -0,0 +1,85 @@
+#### goal: make a sankey diagram with nutrient categories flowing pics <-> non-pics ####
+
+#### [0] prep ####
+library(tidyverse)
+# library(networkD3)
+library(ggalluvial)
+
+
+nut_net_og <- read_csv("output/nut_pict_hs160414.csv")
+
+
+
+
+#### [1] process - ggalluvial version #####
+picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")
+
+# Add PICS/non-PICS IDs
+net_class <- nut_net_og %>% 
+  mutate(
+    ImporterClass = ifelse(importer_iso3c %in% picts, "PICs", "Non-\nPICs"),
+    ExporterClass = ifelse(exporter_iso3c %in% picts, "PICs", "Non-\nPICs")
+  )
+
+
+# List of nutrient columns
+nutrients <- c("protein_sum", "zinc_sum", "fattyacids_sum",
+               "iron_sum", "vita_sum","vitb12_sum","calcium_sum")
+
+# Aggregate data across all years for each nutrient
+net_agg <- net_class %>%
+  filter(!(ExporterClass == "PICs" & ImporterClass == "PICs")) %>%
+  group_by(ExporterClass, ImporterClass) %>%
+  summarise(across(all_of(nutrients), sum, na.rm = TRUE)) %>%
+  pivot_longer(cols = all_of(nutrients), names_to = "Nutrient", values_to = "Value") %>%
+  ungroup() %>%
+  mutate(Nutrient = str_replace(Nutrient, "_sum$", "")) %>%
+  mutate(Nutrient = str_to_title(Nutrient))
+
+alluvial_data <- net_agg %>%
+  mutate(Flow = paste(ExporterClass, "to", ImporterClass)) %>%
+  group_by(Flow, Nutrient) %>%
+  summarise(Value = sum(Value)) %>%
+  separate(Flow, into = c("ExporterClass", "ImporterClass"), sep = " to ") %>%
+  ungroup() %>%  group_by(ExporterClass,ImporterClass)%>%
+  mutate(Proportion = Value / sum(Value)) %>%
+  mutate(Nutrient = factor(Nutrient, levels = unique(Nutrient)),
+         ExporterClass = factor(ExporterClass, levels = unique(ExporterClass)),
+         ImporterClass = factor(ImporterClass, levels = unique(ImporterClass))) %>%
+  droplevels()
+
+# Create labels for proportions
+alluvial_data <- alluvial_data %>%
+  mutate(Label = paste0(round(Proportion * 100, 1), "%"))
+
+exporter_labels <- alluvial_data %>%
+  group_by(ExporterClass) %>%
+  mutate(mid_y = cumsum(Value) + 1 * Value)
+
+
+# Create the plot
+ggplot(alluvial_data,
+       aes(axis1 = ExporterClass, axis2 = ImporterClass, y = Value, fill = Nutrient)) +
+  geom_alluvium(width = 1/12, knot.pos = 0.5) +
+  geom_stratum(aes(color = ExporterClass), fill="grey95",width = 1/12, show.legend = FALSE) +
+  # geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3, vjust = -0.5) +
+  geom_text(aes(label = Label,x=1.2), size = 5, color = "black", position = position_stack(vjust = .3),check_overlap = TRUE) +
+  scale_x_discrete(limits = c("ExporterClass", "ImporterClass"), expand = c(0.1, 0.1),
+                   labels = c("ExporterClass" = "Exporter", "ImporterClass" = "Importer")) +
+  labs(title = "Nutrient Flows Between PICs and Non-PICs",x=NULL,
+       y = NULL, fill = "Nutrient") + 
+  theme_minimal() +
+  theme(axis.text.y = element_blank(),
+        axis.ticks = element_blank(),
+        panel.grid = element_blank(),
+        legend.title = element_text(size=15),
+        legend.text = element_text(size=13),
+        axis.text.x = element_text(size = 15),
+        plot.title = element_text(hjust = 0.5,size=17)) +
+  guides(fill = guide_legend(title = "Nutrient"),color="none")
+
+# ggsave("output/sankey_nutrients.jpg")
+#
+
+
+