generated from CU-ESIIL/Postdoc_OASIS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
big update; rearranged + network analysis progress
- Loading branch information
Showing
61 changed files
with
888,153 additions
and
247,440 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
#### goal: use edge lists to calculate annual network metrics #### | ||
## 'regular networks' and multilayer networks | ||
|
||
#### [0] prep #### | ||
library(tidyverse) | ||
library(tidytext) | ||
library(igraph) | ||
library(ggraph) | ||
|
||
edge_df_pics <- read_csv("output/edge_consumption_pics_nutrients_annual.csv") | ||
|
||
#### [1] prep data #### | ||
nut_df <- edge_df_pics %>% filter(Nutrient == "Protein") %>% | ||
select(SourceContinent,ExporterContinent,ConsumerContinent,year,Nutrient,rni_amount) %>% | ||
mutate(ExporterContinent=str_replace(ExporterContinent,"Domestic PICs","PICs")) | ||
|
||
|
||
#### [2] regular network analysis #### | ||
## write function for creating networks, then calculate metrics | ||
network_metrics <- data.frame() | ||
node_metrics <- data.frame() | ||
|
||
# Loop through each year in the dataset | ||
for (yr in unique(nut_df$year)) { | ||
|
||
#### filter data for the specific year #### | ||
annual_data <- nut_df %>% filter(year == yr) | ||
|
||
#### create edges from the filtered data #### | ||
edges <- rbind( | ||
data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount), | ||
data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount) | ||
) | ||
|
||
#### create a directed graph with weighted edges for this year #### | ||
g <- graph_from_data_frame(edges, directed = TRUE) | ||
|
||
#### calculate network-wide metrics #### | ||
degree_in <- mean(degree(g, mode = "in")) # In-degree centrality | ||
degree_out <- mean(degree(g, mode = "out")) # Out-degree centrality | ||
betweenness <- mean(betweenness(g, directed = TRUE)) # Betweenness centrality | ||
closeness <- mean(closeness(g, mode = "all")) # Closeness centrality | ||
density <- edge_density(g) # Network density | ||
avg_path_length <- mean_distance(g, directed = TRUE) # Average path length | ||
modularity_value <- modularity(cluster_fast_greedy(as.undirected(g))) # Modularity | ||
|
||
# Store network-wide metrics in the dataframe | ||
network_metrics <- rbind(network_metrics, data.frame( | ||
Year = yr, | ||
degree_in = degree_in, | ||
degree_out = degree_out, | ||
betweenness = betweenness, | ||
closeness = closeness, | ||
density = density, | ||
avg_path_length = avg_path_length, | ||
modularity = modularity_value | ||
)) | ||
|
||
#### calculate node-specific metrics #### | ||
node_degree_in <- degree(g, mode = "in") # Node in-degree | ||
node_degree_out <- degree(g, mode = "out") # Node out-degree | ||
node_betweenness <- betweenness(g, directed = TRUE) # Node betweenness | ||
node_closeness <- closeness(g, mode = "all") # Node closeness | ||
node_strength_in <- strength(g, mode = "in", weights = E(g)$weight) # In-strength | ||
node_strength_out <- strength(g, mode = "out", weights = E(g)$weight) # out-strength | ||
node_strength_diff <- node_strength_in - node_strength_out | ||
|
||
# Store node-specific metrics in the dataframe | ||
node_metrics <- rbind(node_metrics, data.frame( | ||
Year = yr, | ||
Node = V(g)$name, | ||
degree_in = node_degree_in, | ||
degree_out = node_degree_out, | ||
betweenness = node_betweenness, | ||
closeness = node_closeness, | ||
strength_in = node_strength_in, | ||
strength_out = node_strength_out, | ||
strength_diff = node_strength_diff | ||
)) | ||
} | ||
|
||
#### save outputs #### | ||
# write.csv(network_metrics,"output/networkmetrics_annual_pics.csv",row.names = FALSE) | ||
# write.csv(node_metrics,"output/nodemetrics_annual_pics.csv",row.names = FALSE) | ||
|
||
#### explore network metric results #### | ||
## initially:avg_path_length, modularity (makes sense since correlated) | ||
# ggplot(network_metrics,aes(year,modularity)) + geom_line() | ||
# summary(lm(modularity ~ year, data = network_metrics)) | ||
|
||
#### explore node metric results #### | ||
ggplot(node_metrics,aes(year,strength_diff,colour = Node,group=Node)) + geom_line() + | ||
ggtitle(paste0(unique(nut_df$Nutrient))) | ||
|
||
|
||
|
||
|
||
#### [3] multilayer network analysis #### | ||
network_metrics_ml <- data.frame() | ||
node_metrics_ml <- data.frame() | ||
|
||
# Loop through each year in the dataset | ||
for (yr in unique(nut_df$year)) { | ||
|
||
#### filter data for the specific year #### | ||
annual_data <- nut_df %>% filter(year == yr) | ||
|
||
#### append explicit role names #### | ||
annual_data$SourceContinent <- paste0(annual_data$SourceContinent, "_Source") | ||
annual_data$ExporterContinent <- paste0(annual_data$ExporterContinent, "_Exporter") | ||
annual_data$ConsumerContinent <- paste0(annual_data$ConsumerContinent, "_Consumer") | ||
|
||
#### create edges for Source -> Exporter and Exporter -> Consumer #### | ||
edges <- rbind( | ||
data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount), | ||
data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount) | ||
) | ||
|
||
#### create a directed graph with weighted edges for this year #### | ||
g <- graph_from_data_frame(edges, directed = TRUE) | ||
|
||
#### calculate network-wide metrics #### | ||
degree_in <- mean(degree(g, mode = "in")) # In-degree centrality | ||
degree_out <- mean(degree(g, mode = "out")) # Out-degree centrality | ||
betweenness <- mean(betweenness(g, directed = TRUE)) # Betweenness centrality | ||
closeness <- mean(closeness(g, mode = "all")) # Closeness centrality | ||
density <- edge_density(g) # Network density | ||
avg_path_length <- mean_distance(g, directed = TRUE) # Average path length | ||
modularity_value <- modularity(cluster_fast_greedy(as.undirected(g))) # Modularity | ||
|
||
# store network-wide metrics in the dataframe | ||
network_metrics_ml <- rbind(network_metrics_ml, data.frame( | ||
year = yr, | ||
degree_in = degree_in, | ||
degree_out = degree_out, | ||
betweenness = betweenness, | ||
closeness = closeness, | ||
density = density, | ||
avg_path_length = avg_path_length, | ||
modularity = modularity_value | ||
)) | ||
|
||
#### calculate node-specific metrics #### | ||
node_degree_in <- degree(g, mode = "in") # Node in-degree | ||
node_degree_out <- degree(g, mode = "out") # Node out-degree | ||
node_betweenness <- betweenness(g, directed = TRUE) # Node betweenness | ||
node_closeness <- closeness(g, mode = "all") # Node closeness | ||
node_strength_in <- strength(g, mode = "in", weights = E(g)$weight) # In-strength | ||
node_strength_out <- strength(g, mode = "out", weights = E(g)$weight) # Out-strength | ||
node_strength_diff <- node_strength_in - node_strength_out # Strength difference | ||
|
||
# store node-specific metrics in the dataframe | ||
node_metrics_ml <- rbind(node_metrics_ml, data.frame( | ||
Year = yr, | ||
Node = V(g)$name, | ||
degree_in = node_degree_in, | ||
degree_out = node_degree_out, | ||
betweenness = node_betweenness, | ||
closeness = node_closeness, | ||
strength_in = node_strength_in, | ||
strength_out = node_strength_out, | ||
strength_diff = node_strength_diff | ||
)) | ||
} | ||
|
||
|
||
|
||
#### save outputs #### | ||
# write.csv(network_metrics_ml,"output/networkmetrics_annual_pics_ml.csv",row.names = FALSE) | ||
# write.csv(node_metrics_ml,"output/nodemetrics_annual_pics_ml.csv",row.names = FALSE) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
#### goal: assess relationships between network merics and relevant variables #### | ||
|
||
#### [0] prep #### | ||
library(tidyverse) | ||
library(countrycode) | ||
|
||
# load network data | ||
net_met_df <- read_csv("output/networkmetrics_annual_pics.csv") | ||
node_met_df <- read_csv("output/nodemetrics_annual_pics.csv") | ||
|
||
# create variables | ||
country_consumption <- read_csv("output/edge_consumption_countries_nutrients.csv") | ||
country_list <- unique(c(country_consumption$SourceCountry,country_consumption$ExporterCountry,country_consumption$SourceCountry)) | ||
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV") | ||
|
||
# load variable data - oni, subsidies, sdg's | ||
oni_og <- read_csv("data/oni.csv") | ||
subsidies_og <- read_csv("data/fish_subsidies/Sumaila_dataset.csv") | ||
sdg2_og <- read_csv("data/Goal2.csv") | ||
sdg3_og <- read_csv("data/Goal3.csv") | ||
treaties_og <- readxl::read_xlsx("data/fapda_details-202410301647.xlsx") # from fao https://fapda.apps.fao.org/fapda/#main.html | ||
psma_og <- read_csv("data/psma_parties.csv") # from fao https://www.fao.org/port-state-measures/background/parties-psma/en/ | ||
|
||
|
||
#### [1] process external variables #### | ||
## align at timesteps, calculate oni summary stats | ||
oni_annual <- oni_og %>% | ||
rowwise() %>% | ||
mutate(Mean_oni = mean(c_across(DJF:NDJ), na.rm = TRUE), | ||
Var_oni = var(c_across(DJF:NDJ), na.rm=TRUE), | ||
Median_oni = median(c_across(DJF:NDJ), na.rm=TRUE), | ||
Min_oni = min(c_across(DJF:NDJ), na.rm = TRUE), | ||
Max_oni = max(c_across(DJF:NDJ), na.rm = TRUE), | ||
Range_oni = Max_oni - Min_oni) %>% | ||
filter(Year >= min(net_met_df$Year) & Year <= max(net_met_df$Year)) %>% select(-c(DJF:NDJ)) | ||
|
||
## process SDG's | ||
# sdg 2 | ||
sdg2.1 <- sdg2_og %>% | ||
filter(SeriesDescription == "Prevalence of undernourishment (%)" & TimePeriod >= 2000 & TimePeriod <= 2020) %>% | ||
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>% | ||
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"), | ||
Value = as.numeric(Value)) %>% | ||
filter(COUNTRY %in% country_list) %>% | ||
filter(!is.na(Value)) %>% | ||
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value)), | ||
Unit="Percentage",SeriesDescription="Prevalence of undernourishment (percentage)") %>% ungroup() %>% | ||
group_by(COUNTRY,TimePeriod) %>% | ||
mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>% | ||
arrange(COUNTRY, TimePeriod) %>% | ||
group_by(Continent) %>% | ||
mutate(Value_continent=mean(Value)) %>% ungroup() %>% | ||
group_by(Continent,TimePeriod) %>% | ||
mutate(Value_continent_annual_sdg2=mean(Value)) %>% ungroup() %>% | ||
rename(Year=TimePeriod,Node=Continent) %>% | ||
select(Node,Year,Value_continent_annual_sdg2) | ||
|
||
# sdg 3 | ||
sdg3.1 <- sdg3_og %>% | ||
filter(SeriesDescription == "Mortality rate attributed to cardiovascular disease, cancer, diabetes or chronic respiratory disease (probability)", | ||
TimePeriod >= 2000 & TimePeriod <= 2020) %>% | ||
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>% | ||
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"), | ||
Value = as.numeric(Value)) %>% | ||
filter(COUNTRY %in% country_list) %>% | ||
filter(!is.na(Value)) %>% | ||
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value))) %>% | ||
arrange(COUNTRY, TimePeriod) %>% # Ensure the data is ordered by COUNTRY and TimePeriod | ||
group_by(COUNTRY,TimePeriod) %>% | ||
mutate(Value = mean(Value), | ||
Unit="Probability", | ||
SeriesDescription="Non-communicable disease mortality (probability)") %>% distinct() %>% | ||
mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>% | ||
arrange(COUNTRY, TimePeriod) %>% | ||
group_by(Continent) %>% | ||
mutate(Value_continent=mean(Value)) %>% ungroup() %>% | ||
group_by(Continent,TimePeriod) %>% | ||
mutate(Value_continent_annual_sdg3=mean(Value)) %>% ungroup() %>% | ||
rename(Year=TimePeriod,Node=Continent) %>% | ||
select(Node,Year,Value_continent_annual_sdg3) %>% distinct() | ||
|
||
# combine all SDGs | ||
sdg_full <- left_join(sdg2.1,sdg3.1,relationship = "many-to-many") %>% distinct() | ||
|
||
|
||
|
||
|
||
|
||
#### combine all data #### | ||
merged_df_net <- left_join(net_met_df,oni_annual,by="Year") | ||
merged_df_node <- left_join(node_met_df,oni_annual,by="Year") %>% | ||
left_join(sdg_full,by=c("Year","Node")) | ||
|
||
|
||
#### [3] analysis time - network-wide metrics vs. other variables #### | ||
## 'splorin | ||
ggplot(merged_df_net,aes(Year,Mean_oni)) + geom_line() | ||
ggplot(merged_df_net,aes(Year,modularity)) + geom_line() | ||
|
||
|
||
## correlations | ||
cor(net_met_df$modularity,oni_annual$Var) | ||
|
||
## linear regression | ||
model <- lm(modularity ~ Mean_oni, data = merged_df_net) | ||
summary(model) | ||
|
||
## multiple linear regressions | ||
model <- lm(modularity ~ Mean_oni + Max_oni + Var_oni, data = merged_df_net) | ||
summary(model) | ||
|
||
|
||
|
||
#### [4] analysis time - node-specific metrics vs. other variables #### | ||
## correlation | ||
cor(merged_df_node$strength_diff,merged_df_node$Value_continent_annual_sdg2,use="complete.obs") | ||
|
||
|
||
## mixed effects## strength_diffmixed effects | ||
library(lme4) | ||
model_mixed <- lmer(strength_diff ~ Mean_oni + (1 | Node) + (1 | Year), data = merged_df_node) | ||
summary(model_mixed) | ||
|
||
## fixed effects | ||
library(plm) | ||
model_fixed <- plm(strength_diff ~ Mean_oni, data = merged_df_node, index = c("Node", "Year"), model = "within") | ||
summary(model_fixed) | ||
|
||
## random effects | ||
model_random <- plm(strength_diff ~ Value_continent_annual_sdg2, | ||
data = merged_df_node, | ||
index = c("Node", "Year"), model = "random") | ||
summary(model_random) | ||
|
||
## GAMs | ||
library(mgcv) | ||
model_gam <- gam(Value_continent_annual_sdg2 ~ s(strength_diff), | ||
data = merged_df_node) | ||
summary(model_gam) | ||
plot(model_gam) | ||
# gam w/ nodes and year | ||
model_gam2 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff) + s(Year) + Node, | ||
data = merged_df_node) | ||
summary(model_gam2) | ||
plot(model_gam2) | ||
|
||
model_gam3 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff,Year) + s(Year) + Node, | ||
data = merged_df_node) | ||
summary(model_gam3) | ||
plot(model_gam3) | ||
|
||
# Plot the smooth term for strength_diff and Year interaction | ||
plot(model_gam3, select = 1, rug = TRUE, shade = TRUE, main = "Interaction: strength_diff and Year") | ||
|
||
# Plot the smooth term for Year | ||
plot(model_gam3, select = 2, rug = TRUE, shade = TRUE, main = "Smooth Effect: Year") | ||
|
||
# Plot the smooth term for each Node (if needed) | ||
plot(model_gam3, select = 3, rug = TRUE, shade = TRUE, main = "Node Effects") | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#### [0] prep #### | ||
library(tidyverse) | ||
library(countrycode) | ||
|
||
# define the PICTs to filter for | ||
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV") | ||
|
||
#### [1] pull consumption data #### | ||
# define the path to your data folder and the file pattern to match | ||
data_path <- "data/consumption/" | ||
file_pattern <- "consumption_midpoint_HS07_\\d{4}\\.csv" | ||
|
||
# list all files in the folder that match the pattern for all years | ||
file_list <- list.files(path = data_path, pattern = file_pattern, full.names = TRUE) | ||
|
||
|
||
# define a function to read and filter each file | ||
load_and_filter <- function(file) { | ||
read_csv(file) %>% | ||
filter( | ||
# consumer_iso3c %in% picts | | ||
# exporter_iso3c %in% picts | | ||
# source_country_iso3c %in% picts, | ||
habitat == "marine", | ||
method == "capture" | ||
) %>% | ||
mutate( | ||
source_country_short = if_else( | ||
str_length(countrycode(source_country_iso3c, "iso3c", "country.name")) > 6, | ||
source_country_iso3c, | ||
countrycode(source_country_iso3c, "iso3c", "country.name") | ||
), | ||
exporter_country_short = if_else( | ||
str_length(countrycode(exporter_iso3c, "iso3c", "country.name")) > 6, | ||
exporter_iso3c, | ||
countrycode(exporter_iso3c, "iso3c", "country.name") | ||
), | ||
consumer_country_short = if_else( | ||
str_length(countrycode(consumer_iso3c, "iso3c", "country.name")) > 6, | ||
consumer_iso3c, | ||
countrycode(consumer_iso3c, "iso3c", "country.name") | ||
), | ||
source_country = countrycode(source_country_iso3c, "iso3c", "country.name") | ||
, | ||
exporter_country = countrycode(exporter_iso3c, "iso3c", "country.name") | ||
, | ||
consumer_country = countrycode(consumer_iso3c, "iso3c", "country.name") | ||
|
||
) | ||
} | ||
|
||
# combine into a single dataframe | ||
combined_data <- bind_rows(lapply(file_list, load_and_filter)) | ||
|
||
## save it - annual | ||
# write.csv(combined_data,paste0("output/consumption_annual_",unique(combined_data$hs_version), ".csv"),row.names = FALSE) | ||
|
||
## save it - years aggregated | ||
consumption_allyears <- group_by(combined_data, | ||
source_country_iso3c,exporter_iso3c,consumer_iso3c) %>% | ||
summarise(total_weight = sum(consumption_live_t)) | ||
# write.csv(consumption_allyears,paste0("output/consumption_allyears_",unique(combined_data$hs_version),".csv"),row.names = FALSE) | ||
|
Oops, something went wrong.