big update; rearranged + network analysis progress

CU-ESIIL · Oct 31, 2024 · 9b15e24 · 9b15e24
1 parent b20d1b1
commit 9b15e24
Show file tree

Hide file tree

Showing 61 changed files with 888,153 additions and 247,440 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/code/10_temporalnetworks.R b/code/10_temporalnetworks.R
@@ -0,0 +1,171 @@
+#### goal: use edge lists to calculate annual network metrics ####
+## 'regular networks' and multilayer networks
+
+#### [0] prep ####
+library(tidyverse)
+library(tidytext)
+library(igraph)
+library(ggraph)
+
+edge_df_pics <- read_csv("output/edge_consumption_pics_nutrients_annual.csv")
+
+#### [1] prep data ####
+nut_df <- edge_df_pics %>% filter(Nutrient == "Protein") %>%
+  select(SourceContinent,ExporterContinent,ConsumerContinent,year,Nutrient,rni_amount) %>%
+  mutate(ExporterContinent=str_replace(ExporterContinent,"Domestic PICs","PICs"))
+
+
+#### [2] regular network analysis ####
+## write function for creating networks, then calculate metrics 
+network_metrics <- data.frame()
+node_metrics <- data.frame()
+
+# Loop through each year in the dataset
+for (yr in unique(nut_df$year)) {
+
+  #### filter data for the specific year ####
+  annual_data <- nut_df %>% filter(year == yr)
+
+  #### create edges from the filtered data ####
+  edges <- rbind(
+    data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount),
+    data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount)
+  )
+
+  #### create a directed graph with weighted edges for this year ####
+  g <- graph_from_data_frame(edges, directed = TRUE)
+
+  #### calculate network-wide metrics ####
+  degree_in <- mean(degree(g, mode = "in"))  # In-degree centrality
+  degree_out <- mean(degree(g, mode = "out"))  # Out-degree centrality
+  betweenness <- mean(betweenness(g, directed = TRUE))  # Betweenness centrality
+  closeness <- mean(closeness(g, mode = "all"))  # Closeness centrality
+  density <- edge_density(g)  # Network density
+  avg_path_length <- mean_distance(g, directed = TRUE)  # Average path length
+  modularity_value <- modularity(cluster_fast_greedy(as.undirected(g)))  # Modularity
+
+  # Store network-wide metrics in the dataframe
+  network_metrics <- rbind(network_metrics, data.frame(
+    Year = yr,
+    degree_in = degree_in,
+    degree_out = degree_out,
+    betweenness = betweenness,
+    closeness = closeness,
+    density = density,
+    avg_path_length = avg_path_length,
+    modularity = modularity_value
+  ))
+
+  #### calculate node-specific metrics ####
+  node_degree_in <- degree(g, mode = "in")  # Node in-degree
+  node_degree_out <- degree(g, mode = "out")  # Node out-degree
+  node_betweenness <- betweenness(g, directed = TRUE)  # Node betweenness
+  node_closeness <- closeness(g, mode = "all")  # Node closeness
+  node_strength_in <- strength(g, mode = "in", weights = E(g)$weight)  # In-strength
+  node_strength_out <- strength(g, mode = "out", weights = E(g)$weight) # out-strength
+  node_strength_diff <- node_strength_in - node_strength_out
+
+  # Store node-specific metrics in the dataframe
+  node_metrics <- rbind(node_metrics, data.frame(
+    Year = yr,
+    Node = V(g)$name,
+    degree_in = node_degree_in,
+    degree_out = node_degree_out,
+    betweenness = node_betweenness,
+    closeness = node_closeness,
+    strength_in = node_strength_in,  
+    strength_out = node_strength_out,
+    strength_diff = node_strength_diff
+  ))
+}
+
+#### save outputs ####
+# write.csv(network_metrics,"output/networkmetrics_annual_pics.csv",row.names = FALSE)
+# write.csv(node_metrics,"output/nodemetrics_annual_pics.csv",row.names = FALSE)
+
+#### explore network metric results ####
+## initially:avg_path_length, modularity (makes sense since correlated)
+# ggplot(network_metrics,aes(year,modularity)) + geom_line()
+# summary(lm(modularity ~ year, data = network_metrics))
+
+#### explore node metric results ####
+ggplot(node_metrics,aes(year,strength_diff,colour = Node,group=Node)) + geom_line() +
+  ggtitle(paste0(unique(nut_df$Nutrient)))
+
+
+
+
+#### [3] multilayer network analysis ####
+network_metrics_ml <- data.frame()
+node_metrics_ml <- data.frame()
+
+# Loop through each year in the dataset
+for (yr in unique(nut_df$year)) {
+
+  #### filter data for the specific year ####
+  annual_data <- nut_df %>% filter(year == yr)
+
+  #### append explicit role names ####
+  annual_data$SourceContinent <- paste0(annual_data$SourceContinent, "_Source")
+  annual_data$ExporterContinent <- paste0(annual_data$ExporterContinent, "_Exporter")
+  annual_data$ConsumerContinent <- paste0(annual_data$ConsumerContinent, "_Consumer")
+
+  #### create edges for Source -> Exporter and Exporter -> Consumer ####
+  edges <- rbind(
+    data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount),
+    data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount)
+  )
+
+  #### create a directed graph with weighted edges for this year ####
+  g <- graph_from_data_frame(edges, directed = TRUE)
+
+  #### calculate network-wide metrics ####
+  degree_in <- mean(degree(g, mode = "in"))  # In-degree centrality
+  degree_out <- mean(degree(g, mode = "out"))  # Out-degree centrality
+  betweenness <- mean(betweenness(g, directed = TRUE))  # Betweenness centrality
+  closeness <- mean(closeness(g, mode = "all"))  # Closeness centrality
+  density <- edge_density(g)  # Network density
+  avg_path_length <- mean_distance(g, directed = TRUE)  # Average path length
+  modularity_value <- modularity(cluster_fast_greedy(as.undirected(g)))  # Modularity
+
+  # store network-wide metrics in the dataframe
+  network_metrics_ml <- rbind(network_metrics_ml, data.frame(
+    year = yr,
+    degree_in = degree_in,
+    degree_out = degree_out,
+    betweenness = betweenness,
+    closeness = closeness,
+    density = density,
+    avg_path_length = avg_path_length,
+    modularity = modularity_value
+  ))
+
+  #### calculate node-specific metrics ####
+  node_degree_in <- degree(g, mode = "in")  # Node in-degree
+  node_degree_out <- degree(g, mode = "out")  # Node out-degree
+  node_betweenness <- betweenness(g, directed = TRUE)  # Node betweenness
+  node_closeness <- closeness(g, mode = "all")  # Node closeness
+  node_strength_in <- strength(g, mode = "in", weights = E(g)$weight)  # In-strength
+  node_strength_out <- strength(g, mode = "out", weights = E(g)$weight)  # Out-strength
+  node_strength_diff <- node_strength_in - node_strength_out  # Strength difference
+
+  # store node-specific metrics in the dataframe
+  node_metrics_ml <- rbind(node_metrics_ml, data.frame(
+    Year = yr,
+    Node = V(g)$name,
+    degree_in = node_degree_in,
+    degree_out = node_degree_out,
+    betweenness = node_betweenness,
+    closeness = node_closeness,
+    strength_in = node_strength_in,  
+    strength_out = node_strength_out,
+    strength_diff = node_strength_diff
+  ))
+}
+
+
+
+#### save outputs ####
+# write.csv(network_metrics_ml,"output/networkmetrics_annual_pics_ml.csv",row.names = FALSE)
+# write.csv(node_metrics_ml,"output/nodemetrics_annual_pics_ml.csv",row.names = FALSE)
+
diff --git a/code/11_temporalanalysis.R b/code/11_temporalanalysis.R
@@ -0,0 +1,163 @@
+#### goal: assess relationships between network merics and relevant variables ####
+
+#### [0] prep ####
+library(tidyverse)
+library(countrycode)
+
+# load network data
+net_met_df <- read_csv("output/networkmetrics_annual_pics.csv")
+node_met_df <- read_csv("output/nodemetrics_annual_pics.csv")
+
+# create variables
+country_consumption <- read_csv("output/edge_consumption_countries_nutrients.csv")
+country_list <- unique(c(country_consumption$SourceCountry,country_consumption$ExporterCountry,country_consumption$SourceCountry))
+picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")
+
+# load variable data - oni, subsidies, sdg's
+oni_og <- read_csv("data/oni.csv")
+subsidies_og <- read_csv("data/fish_subsidies/Sumaila_dataset.csv")
+sdg2_og <- read_csv("data/Goal2.csv")
+sdg3_og <- read_csv("data/Goal3.csv")
+treaties_og <- readxl::read_xlsx("data/fapda_details-202410301647.xlsx") # from fao https://fapda.apps.fao.org/fapda/#main.html
+psma_og <- read_csv("data/psma_parties.csv") # from fao https://www.fao.org/port-state-measures/background/parties-psma/en/
+
+
+#### [1] process external variables ####
+## align at timesteps, calculate oni summary stats
+oni_annual <- oni_og %>% 
+  rowwise() %>% 
+  mutate(Mean_oni = mean(c_across(DJF:NDJ), na.rm = TRUE),
+         Var_oni = var(c_across(DJF:NDJ), na.rm=TRUE),
+         Median_oni = median(c_across(DJF:NDJ), na.rm=TRUE),
+         Min_oni = min(c_across(DJF:NDJ), na.rm = TRUE),
+         Max_oni = max(c_across(DJF:NDJ), na.rm = TRUE),
+         Range_oni = Max_oni - Min_oni) %>%
+  filter(Year >= min(net_met_df$Year) & Year <= max(net_met_df$Year)) %>% select(-c(DJF:NDJ))
+
+## process SDG's
+# sdg 2
+sdg2.1 <- sdg2_og %>%
+  filter(SeriesDescription == "Prevalence of undernourishment (%)" & TimePeriod >= 2000 & TimePeriod <= 2020) %>%
+  select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
+  mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
+         Value = as.numeric(Value)) %>%
+  filter(COUNTRY %in% country_list) %>%
+  filter(!is.na(Value)) %>%
+  mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value)),
+         Unit="Percentage",SeriesDescription="Prevalence of undernourishment (percentage)") %>% ungroup() %>%
+  group_by(COUNTRY,TimePeriod) %>%
+  mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>%
+  arrange(COUNTRY, TimePeriod) %>%
+  group_by(Continent) %>%
+  mutate(Value_continent=mean(Value)) %>% ungroup() %>%
+  group_by(Continent,TimePeriod) %>%
+  mutate(Value_continent_annual_sdg2=mean(Value)) %>% ungroup() %>%
+  rename(Year=TimePeriod,Node=Continent) %>%
+  select(Node,Year,Value_continent_annual_sdg2)
+
+# sdg 3
+sdg3.1 <- sdg3_og %>% 
+  filter(SeriesDescription == "Mortality rate attributed to cardiovascular disease, cancer, diabetes or chronic respiratory disease (probability)", 
+         TimePeriod >= 2000 & TimePeriod <= 2020) %>%
+  select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
+  mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
+         Value = as.numeric(Value)) %>%
+  filter(COUNTRY %in% country_list) %>%
+  filter(!is.na(Value)) %>%
+  mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value))) %>%
+  arrange(COUNTRY, TimePeriod)  %>% # Ensure the data is ordered by COUNTRY and TimePeriod
+  group_by(COUNTRY,TimePeriod) %>%
+  mutate(Value = mean(Value),
+         Unit="Probability",
+         SeriesDescription="Non-communicable disease mortality (probability)") %>% distinct() %>%
+  mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>%
+  arrange(COUNTRY, TimePeriod) %>%
+  group_by(Continent) %>%
+  mutate(Value_continent=mean(Value)) %>% ungroup() %>%
+  group_by(Continent,TimePeriod) %>%
+  mutate(Value_continent_annual_sdg3=mean(Value)) %>% ungroup() %>%
+  rename(Year=TimePeriod,Node=Continent) %>%
+  select(Node,Year,Value_continent_annual_sdg3) %>% distinct()
+
+# combine all SDGs
+sdg_full <- left_join(sdg2.1,sdg3.1,relationship = "many-to-many") %>% distinct()
+
+
+
+
+
+#### combine all data ####
+merged_df_net <- left_join(net_met_df,oni_annual,by="Year")
+merged_df_node <- left_join(node_met_df,oni_annual,by="Year") %>%
+  left_join(sdg_full,by=c("Year","Node"))
+
+
+#### [3] analysis time - network-wide metrics vs. other variables ####
+## 'splorin
+ggplot(merged_df_net,aes(Year,Mean_oni)) + geom_line()
+ggplot(merged_df_net,aes(Year,modularity)) + geom_line()
+
+
+## correlations
+cor(net_met_df$modularity,oni_annual$Var)
+
+## linear regression
+model <- lm(modularity ~ Mean_oni, data = merged_df_net)
+summary(model)
+
+## multiple linear regressions
+model <- lm(modularity ~ Mean_oni + Max_oni + Var_oni, data = merged_df_net)
+summary(model)
+
+
+
+#### [4] analysis time - node-specific metrics vs. other variables ####
+## correlation
+cor(merged_df_node$strength_diff,merged_df_node$Value_continent_annual_sdg2,use="complete.obs")
+
+
+## mixed effects## strength_diffmixed effects
+library(lme4)
+model_mixed <- lmer(strength_diff ~ Mean_oni + (1 | Node) + (1 | Year), data = merged_df_node)
+summary(model_mixed)
+
+## fixed effects
+library(plm)
+model_fixed <- plm(strength_diff ~ Mean_oni, data = merged_df_node, index = c("Node", "Year"), model = "within")
+summary(model_fixed)
+
+## random effects
+model_random <- plm(strength_diff ~ Value_continent_annual_sdg2, 
+                    data = merged_df_node, 
+                    index = c("Node", "Year"), model = "random")
+summary(model_random)
+
+## GAMs
+library(mgcv)
+model_gam <- gam(Value_continent_annual_sdg2 ~ s(strength_diff), 
+                 data = merged_df_node)
+summary(model_gam)
+plot(model_gam)
+# gam w/ nodes and year
+model_gam2 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff) + s(Year) + Node, 
+                 data = merged_df_node)
+summary(model_gam2)
+plot(model_gam2)
+
+model_gam3 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff,Year) + s(Year) + Node, 
+                  data = merged_df_node)
+summary(model_gam3)
+plot(model_gam3)
+
+# Plot the smooth term for strength_diff and Year interaction
+plot(model_gam3, select = 1, rug = TRUE, shade = TRUE, main = "Interaction: strength_diff and Year")
+
+# Plot the smooth term for Year
+plot(model_gam3, select = 2, rug = TRUE, shade = TRUE, main = "Smooth Effect: Year")
+
+# Plot the smooth term for each Node (if needed)
+plot(model_gam3, select = 3, rug = TRUE, shade = TRUE, main = "Node Effects")
+
+
+
+
diff --git a/code/1a_grab_consumption.R b/code/1a_grab_consumption.R
@@ -0,0 +1,63 @@
+#### [0] prep ####
+library(tidyverse)
+library(countrycode)
+
+# define the PICTs to filter for
+picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")
+
+#### [1] pull consumption data ####
+# define the path to your data folder and the file pattern to match
+data_path <- "data/consumption/"
+file_pattern <- "consumption_midpoint_HS07_\\d{4}\\.csv"  
+
+# list all files in the folder that match the pattern for all years
+file_list <- list.files(path = data_path, pattern = file_pattern, full.names = TRUE)
+
+
+# define a function to read and filter each file
+load_and_filter <- function(file) {
+  read_csv(file) %>%
+    filter(
+      # consumer_iso3c %in% picts |
+      #   exporter_iso3c %in% picts |
+      #   source_country_iso3c %in% picts,
+      habitat == "marine",
+      method == "capture"
+    ) %>%
+    mutate(
+      source_country_short = if_else(
+        str_length(countrycode(source_country_iso3c, "iso3c", "country.name")) > 6, 
+        source_country_iso3c, 
+        countrycode(source_country_iso3c, "iso3c", "country.name")
+      ),
+      exporter_country_short = if_else(
+        str_length(countrycode(exporter_iso3c, "iso3c", "country.name")) > 6, 
+        exporter_iso3c, 
+        countrycode(exporter_iso3c, "iso3c", "country.name")
+      ),
+      consumer_country_short = if_else(
+        str_length(countrycode(consumer_iso3c, "iso3c", "country.name")) > 6, 
+        consumer_iso3c, 
+        countrycode(consumer_iso3c, "iso3c", "country.name")
+      ),
+      source_country = countrycode(source_country_iso3c, "iso3c", "country.name")
+      ,
+      exporter_country = countrycode(exporter_iso3c, "iso3c", "country.name")
+      ,
+      consumer_country = countrycode(consumer_iso3c, "iso3c", "country.name")
+
+    )
+}
+
+# combine into a single dataframe
+combined_data <- bind_rows(lapply(file_list, load_and_filter))
+
+## save it - annual
+# write.csv(combined_data,paste0("output/consumption_annual_",unique(combined_data$hs_version), ".csv"),row.names = FALSE)
+
+## save it - years aggregated
+consumption_allyears <- group_by(combined_data,
+                                 source_country_iso3c,exporter_iso3c,consumer_iso3c) %>%
+  summarise(total_weight = sum(consumption_live_t))
+# write.csv(consumption_allyears,paste0("output/consumption_allyears_",unique(combined_data$hs_version),".csv"),row.names = FALSE)
+