Skip to content

Commit

Permalink
big update; rearranged + network analysis progress
Browse files Browse the repository at this point in the history
  • Loading branch information
cake-oh committed Oct 31, 2024
1 parent b20d1b1 commit 9b15e24
Show file tree
Hide file tree
Showing 61 changed files with 888,153 additions and 247,440 deletions.
Binary file modified .DS_Store
Binary file not shown.
171 changes: 171 additions & 0 deletions code/10_temporalnetworks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#### goal: use edge lists to calculate annual network metrics ####
## 'regular networks' and multilayer networks

#### [0] prep ####
library(tidyverse)
library(tidytext)
library(igraph)
library(ggraph)

edge_df_pics <- read_csv("output/edge_consumption_pics_nutrients_annual.csv")

#### [1] prep data ####
nut_df <- edge_df_pics %>% filter(Nutrient == "Protein") %>%
select(SourceContinent,ExporterContinent,ConsumerContinent,year,Nutrient,rni_amount) %>%
mutate(ExporterContinent=str_replace(ExporterContinent,"Domestic PICs","PICs"))


#### [2] regular network analysis ####
## write function for creating networks, then calculate metrics
network_metrics <- data.frame()
node_metrics <- data.frame()

# Loop through each year in the dataset
for (yr in unique(nut_df$year)) {

#### filter data for the specific year ####
annual_data <- nut_df %>% filter(year == yr)

#### create edges from the filtered data ####
edges <- rbind(
data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount),
data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount)
)

#### create a directed graph with weighted edges for this year ####
g <- graph_from_data_frame(edges, directed = TRUE)

#### calculate network-wide metrics ####
degree_in <- mean(degree(g, mode = "in")) # In-degree centrality
degree_out <- mean(degree(g, mode = "out")) # Out-degree centrality
betweenness <- mean(betweenness(g, directed = TRUE)) # Betweenness centrality
closeness <- mean(closeness(g, mode = "all")) # Closeness centrality
density <- edge_density(g) # Network density
avg_path_length <- mean_distance(g, directed = TRUE) # Average path length
modularity_value <- modularity(cluster_fast_greedy(as.undirected(g))) # Modularity

# Store network-wide metrics in the dataframe
network_metrics <- rbind(network_metrics, data.frame(
Year = yr,
degree_in = degree_in,
degree_out = degree_out,
betweenness = betweenness,
closeness = closeness,
density = density,
avg_path_length = avg_path_length,
modularity = modularity_value
))

#### calculate node-specific metrics ####
node_degree_in <- degree(g, mode = "in") # Node in-degree
node_degree_out <- degree(g, mode = "out") # Node out-degree
node_betweenness <- betweenness(g, directed = TRUE) # Node betweenness
node_closeness <- closeness(g, mode = "all") # Node closeness
node_strength_in <- strength(g, mode = "in", weights = E(g)$weight) # In-strength
node_strength_out <- strength(g, mode = "out", weights = E(g)$weight) # out-strength
node_strength_diff <- node_strength_in - node_strength_out

# Store node-specific metrics in the dataframe
node_metrics <- rbind(node_metrics, data.frame(
Year = yr,
Node = V(g)$name,
degree_in = node_degree_in,
degree_out = node_degree_out,
betweenness = node_betweenness,
closeness = node_closeness,
strength_in = node_strength_in,
strength_out = node_strength_out,
strength_diff = node_strength_diff
))
}

#### save outputs ####
# write.csv(network_metrics,"output/networkmetrics_annual_pics.csv",row.names = FALSE)
# write.csv(node_metrics,"output/nodemetrics_annual_pics.csv",row.names = FALSE)

#### explore network metric results ####
## initially:avg_path_length, modularity (makes sense since correlated)
# ggplot(network_metrics,aes(year,modularity)) + geom_line()
# summary(lm(modularity ~ year, data = network_metrics))

#### explore node metric results ####
ggplot(node_metrics,aes(year,strength_diff,colour = Node,group=Node)) + geom_line() +
ggtitle(paste0(unique(nut_df$Nutrient)))




#### [3] multilayer network analysis ####
network_metrics_ml <- data.frame()
node_metrics_ml <- data.frame()

# Loop through each year in the dataset
for (yr in unique(nut_df$year)) {

#### filter data for the specific year ####
annual_data <- nut_df %>% filter(year == yr)

#### append explicit role names ####
annual_data$SourceContinent <- paste0(annual_data$SourceContinent, "_Source")
annual_data$ExporterContinent <- paste0(annual_data$ExporterContinent, "_Exporter")
annual_data$ConsumerContinent <- paste0(annual_data$ConsumerContinent, "_Consumer")

#### create edges for Source -> Exporter and Exporter -> Consumer ####
edges <- rbind(
data.frame(from = annual_data$SourceContinent, to = annual_data$ExporterContinent, weight = annual_data$rni_amount),
data.frame(from = annual_data$ExporterContinent, to = annual_data$ConsumerContinent, weight = annual_data$rni_amount)
)

#### create a directed graph with weighted edges for this year ####
g <- graph_from_data_frame(edges, directed = TRUE)

#### calculate network-wide metrics ####
degree_in <- mean(degree(g, mode = "in")) # In-degree centrality
degree_out <- mean(degree(g, mode = "out")) # Out-degree centrality
betweenness <- mean(betweenness(g, directed = TRUE)) # Betweenness centrality
closeness <- mean(closeness(g, mode = "all")) # Closeness centrality
density <- edge_density(g) # Network density
avg_path_length <- mean_distance(g, directed = TRUE) # Average path length
modularity_value <- modularity(cluster_fast_greedy(as.undirected(g))) # Modularity

# store network-wide metrics in the dataframe
network_metrics_ml <- rbind(network_metrics_ml, data.frame(
year = yr,
degree_in = degree_in,
degree_out = degree_out,
betweenness = betweenness,
closeness = closeness,
density = density,
avg_path_length = avg_path_length,
modularity = modularity_value
))

#### calculate node-specific metrics ####
node_degree_in <- degree(g, mode = "in") # Node in-degree
node_degree_out <- degree(g, mode = "out") # Node out-degree
node_betweenness <- betweenness(g, directed = TRUE) # Node betweenness
node_closeness <- closeness(g, mode = "all") # Node closeness
node_strength_in <- strength(g, mode = "in", weights = E(g)$weight) # In-strength
node_strength_out <- strength(g, mode = "out", weights = E(g)$weight) # Out-strength
node_strength_diff <- node_strength_in - node_strength_out # Strength difference

# store node-specific metrics in the dataframe
node_metrics_ml <- rbind(node_metrics_ml, data.frame(
Year = yr,
Node = V(g)$name,
degree_in = node_degree_in,
degree_out = node_degree_out,
betweenness = node_betweenness,
closeness = node_closeness,
strength_in = node_strength_in,
strength_out = node_strength_out,
strength_diff = node_strength_diff
))
}



#### save outputs ####
# write.csv(network_metrics_ml,"output/networkmetrics_annual_pics_ml.csv",row.names = FALSE)
# write.csv(node_metrics_ml,"output/nodemetrics_annual_pics_ml.csv",row.names = FALSE)

163 changes: 163 additions & 0 deletions code/11_temporalanalysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#### goal: assess relationships between network merics and relevant variables ####

#### [0] prep ####
library(tidyverse)
library(countrycode)

# load network data
net_met_df <- read_csv("output/networkmetrics_annual_pics.csv")
node_met_df <- read_csv("output/nodemetrics_annual_pics.csv")

# create variables
country_consumption <- read_csv("output/edge_consumption_countries_nutrients.csv")
country_list <- unique(c(country_consumption$SourceCountry,country_consumption$ExporterCountry,country_consumption$SourceCountry))
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")

# load variable data - oni, subsidies, sdg's
oni_og <- read_csv("data/oni.csv")
subsidies_og <- read_csv("data/fish_subsidies/Sumaila_dataset.csv")
sdg2_og <- read_csv("data/Goal2.csv")
sdg3_og <- read_csv("data/Goal3.csv")
treaties_og <- readxl::read_xlsx("data/fapda_details-202410301647.xlsx") # from fao https://fapda.apps.fao.org/fapda/#main.html
psma_og <- read_csv("data/psma_parties.csv") # from fao https://www.fao.org/port-state-measures/background/parties-psma/en/


#### [1] process external variables ####
## align at timesteps, calculate oni summary stats
oni_annual <- oni_og %>%
rowwise() %>%
mutate(Mean_oni = mean(c_across(DJF:NDJ), na.rm = TRUE),
Var_oni = var(c_across(DJF:NDJ), na.rm=TRUE),
Median_oni = median(c_across(DJF:NDJ), na.rm=TRUE),
Min_oni = min(c_across(DJF:NDJ), na.rm = TRUE),
Max_oni = max(c_across(DJF:NDJ), na.rm = TRUE),
Range_oni = Max_oni - Min_oni) %>%
filter(Year >= min(net_met_df$Year) & Year <= max(net_met_df$Year)) %>% select(-c(DJF:NDJ))

## process SDG's
# sdg 2
sdg2.1 <- sdg2_og %>%
filter(SeriesDescription == "Prevalence of undernourishment (%)" & TimePeriod >= 2000 & TimePeriod <= 2020) %>%
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
Value = as.numeric(Value)) %>%
filter(COUNTRY %in% country_list) %>%
filter(!is.na(Value)) %>%
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value)),
Unit="Percentage",SeriesDescription="Prevalence of undernourishment (percentage)") %>% ungroup() %>%
group_by(COUNTRY,TimePeriod) %>%
mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>%
arrange(COUNTRY, TimePeriod) %>%
group_by(Continent) %>%
mutate(Value_continent=mean(Value)) %>% ungroup() %>%
group_by(Continent,TimePeriod) %>%
mutate(Value_continent_annual_sdg2=mean(Value)) %>% ungroup() %>%
rename(Year=TimePeriod,Node=Continent) %>%
select(Node,Year,Value_continent_annual_sdg2)

# sdg 3
sdg3.1 <- sdg3_og %>%
filter(SeriesDescription == "Mortality rate attributed to cardiovascular disease, cancer, diabetes or chronic respiratory disease (probability)",
TimePeriod >= 2000 & TimePeriod <= 2020) %>%
select(Goal, Target, Indicator, SeriesDescription, GeoAreaName, TimePeriod, Value) %>%
mutate(COUNTRY = countrycode::countrycode(GeoAreaName, "country.name", "iso3c"),
Value = as.numeric(Value)) %>%
filter(COUNTRY %in% country_list) %>%
filter(!is.na(Value)) %>%
mutate(Value = ifelse(Value < 1, 1, ifelse(Value < 2.5, 2.5, Value))) %>%
arrange(COUNTRY, TimePeriod) %>% # Ensure the data is ordered by COUNTRY and TimePeriod
group_by(COUNTRY,TimePeriod) %>%
mutate(Value = mean(Value),
Unit="Probability",
SeriesDescription="Non-communicable disease mortality (probability)") %>% distinct() %>%
mutate(Continent = ifelse(COUNTRY %in% picts, "PICs", countrycode(COUNTRY, "iso3c", "continent"))) %>% ungroup() %>%
arrange(COUNTRY, TimePeriod) %>%
group_by(Continent) %>%
mutate(Value_continent=mean(Value)) %>% ungroup() %>%
group_by(Continent,TimePeriod) %>%
mutate(Value_continent_annual_sdg3=mean(Value)) %>% ungroup() %>%
rename(Year=TimePeriod,Node=Continent) %>%
select(Node,Year,Value_continent_annual_sdg3) %>% distinct()

# combine all SDGs
sdg_full <- left_join(sdg2.1,sdg3.1,relationship = "many-to-many") %>% distinct()





#### combine all data ####
merged_df_net <- left_join(net_met_df,oni_annual,by="Year")
merged_df_node <- left_join(node_met_df,oni_annual,by="Year") %>%
left_join(sdg_full,by=c("Year","Node"))


#### [3] analysis time - network-wide metrics vs. other variables ####
## 'splorin
ggplot(merged_df_net,aes(Year,Mean_oni)) + geom_line()
ggplot(merged_df_net,aes(Year,modularity)) + geom_line()


## correlations
cor(net_met_df$modularity,oni_annual$Var)

## linear regression
model <- lm(modularity ~ Mean_oni, data = merged_df_net)
summary(model)

## multiple linear regressions
model <- lm(modularity ~ Mean_oni + Max_oni + Var_oni, data = merged_df_net)
summary(model)



#### [4] analysis time - node-specific metrics vs. other variables ####
## correlation
cor(merged_df_node$strength_diff,merged_df_node$Value_continent_annual_sdg2,use="complete.obs")


## mixed effects## strength_diffmixed effects
library(lme4)
model_mixed <- lmer(strength_diff ~ Mean_oni + (1 | Node) + (1 | Year), data = merged_df_node)
summary(model_mixed)

## fixed effects
library(plm)
model_fixed <- plm(strength_diff ~ Mean_oni, data = merged_df_node, index = c("Node", "Year"), model = "within")
summary(model_fixed)

## random effects
model_random <- plm(strength_diff ~ Value_continent_annual_sdg2,
data = merged_df_node,
index = c("Node", "Year"), model = "random")
summary(model_random)

## GAMs
library(mgcv)
model_gam <- gam(Value_continent_annual_sdg2 ~ s(strength_diff),
data = merged_df_node)
summary(model_gam)
plot(model_gam)
# gam w/ nodes and year
model_gam2 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff) + s(Year) + Node,
data = merged_df_node)
summary(model_gam2)
plot(model_gam2)

model_gam3 <- gam(Value_continent_annual_sdg2 ~ s(strength_diff,Year) + s(Year) + Node,
data = merged_df_node)
summary(model_gam3)
plot(model_gam3)

# Plot the smooth term for strength_diff and Year interaction
plot(model_gam3, select = 1, rug = TRUE, shade = TRUE, main = "Interaction: strength_diff and Year")

# Plot the smooth term for Year
plot(model_gam3, select = 2, rug = TRUE, shade = TRUE, main = "Smooth Effect: Year")

# Plot the smooth term for each Node (if needed)
plot(model_gam3, select = 3, rug = TRUE, shade = TRUE, main = "Node Effects")




63 changes: 63 additions & 0 deletions code/1a_grab_consumption.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#### [0] prep ####
library(tidyverse)
library(countrycode)

# define the PICTs to filter for
picts <- c("FJI", "PNG", "SLB", "VUT", "FSM", "PLW", "MHL", "KIR", "NRU", "WSM", "TON", "TUV")

#### [1] pull consumption data ####
# define the path to your data folder and the file pattern to match
data_path <- "data/consumption/"
file_pattern <- "consumption_midpoint_HS07_\\d{4}\\.csv"

# list all files in the folder that match the pattern for all years
file_list <- list.files(path = data_path, pattern = file_pattern, full.names = TRUE)


# define a function to read and filter each file
load_and_filter <- function(file) {
read_csv(file) %>%
filter(
# consumer_iso3c %in% picts |
# exporter_iso3c %in% picts |
# source_country_iso3c %in% picts,
habitat == "marine",
method == "capture"
) %>%
mutate(
source_country_short = if_else(
str_length(countrycode(source_country_iso3c, "iso3c", "country.name")) > 6,
source_country_iso3c,
countrycode(source_country_iso3c, "iso3c", "country.name")
),
exporter_country_short = if_else(
str_length(countrycode(exporter_iso3c, "iso3c", "country.name")) > 6,
exporter_iso3c,
countrycode(exporter_iso3c, "iso3c", "country.name")
),
consumer_country_short = if_else(
str_length(countrycode(consumer_iso3c, "iso3c", "country.name")) > 6,
consumer_iso3c,
countrycode(consumer_iso3c, "iso3c", "country.name")
),
source_country = countrycode(source_country_iso3c, "iso3c", "country.name")
,
exporter_country = countrycode(exporter_iso3c, "iso3c", "country.name")
,
consumer_country = countrycode(consumer_iso3c, "iso3c", "country.name")

)
}

# combine into a single dataframe
combined_data <- bind_rows(lapply(file_list, load_and_filter))

## save it - annual
# write.csv(combined_data,paste0("output/consumption_annual_",unique(combined_data$hs_version), ".csv"),row.names = FALSE)

## save it - years aggregated
consumption_allyears <- group_by(combined_data,
source_country_iso3c,exporter_iso3c,consumer_iso3c) %>%
summarise(total_weight = sum(consumption_live_t))
# write.csv(consumption_allyears,paste0("output/consumption_allyears_",unique(combined_data$hs_version),".csv"),row.names = FALSE)

Loading

0 comments on commit 9b15e24

Please sign in to comment.