diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e57b1bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata +MultilayerExtraction.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index 9de5ec1..852aae9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,8 +5,10 @@ Author: James D. Wilson [aut, cre] Maintainer: James D. Wilson Description: An R package to identify and describe strongly connected vertex-layer communities in multilayer networks. Depends: - R (>= 3.2.2) + R (>= 3.2.2), igraph, foreach, doParallel License: GPL (>= 2) Encoding: UTF-8 LazyData: TRUE RoxygenNote: 6.0.1 +Suggests: + testthat diff --git a/MultilayerExtraction.Rproj b/MultilayerExtraction.Rproj index d848a9f..cba1b6b 100644 --- a/MultilayerExtraction.Rproj +++ b/MultilayerExtraction.Rproj @@ -5,8 +5,13 @@ SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 Encoding: UTF-8 +RnwWeave: Sweave +LaTeX: pdfLaTeX + AutoAppendNewline: Yes StripTrailingWhitespace: Yes diff --git a/R/multilayer_extraction.R b/R/multilayer_extraction.R index d101264..0753eb1 100644 --- a/R/multilayer_extraction.R +++ b/R/multilayer_extraction.R @@ -1,9 +1,9 @@ #' multilayer.extraction -#' +#' #'Function that identifies statistically significant vertex-layer communities in multilayer networks. -#' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. +#' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. #' @param seed: seed for reproducibility. The initial neighborhoods that act as seeds for the multilayer extraction algorithm -#' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123. +#' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123. #' @param min.score: the minimum score allowable for an extracted community. Default is 0. #' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 #' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. @@ -12,59 +12,59 @@ #' @return A MultilayerCommunity object, which is a list containing the following objects #' \itemize{ #' \item Community.List: a list of vertex-layer communities extracted from the algorithm -#' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of -#' each community, and includes for each level of overlap parameter Beta the mean score, and the total number -#' of communities. This is used for determining the overall number of communities in a multilayer network. +#' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of +#' each community, and includes for each level of overlap parameter Beta the mean score, and the total number +#' of communities. This is used for determining the overall number of communities in a multilayer network. #' } #'@references #'\itemize{ -#' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based +#' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based #' extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research -#' } +#' } #' @author James D. Wilson -#' @export -#' -#' +#' @export +#' +#' multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.sample = 0.05, directed = c(FALSE, TRUE)){ #adjacency should be an edgelist with three columns - node1, node2, layer #layer should be numbered with integers #each network has the same number of nodes #nodes are indexed by the integers starting from 1 - + m <- max(adjacency[, 3]) #max of the layer index n <- length(unique(c(adjacency[, 1], adjacency[, 2]))) directed <- directed[1] #Calculate the modularity matrix print(paste("Estimation Stage")) - - mod.matrix <- multilayer.modularity.matrix(adjacency) - + + mod.matrix <- multilayer.modularity.matrix(adjacency) + #Initialize the communities print(paste("Initialization Stage")) - + for(i in 1:m){ if(i == 1){ - graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], + graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], directed = directed) initial.set <- initialization(graph, prop.sample, m, n) }else{ - graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], + graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], directed = directed) initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) } } - + #Search Across Initial sets print(paste("Search Stage")) - + cat(paste("Searching over", length(initial.set[[1]]), "seed sets \n")) Results.temp <- list() K <- length(initial.set[[1]]) - + #detectCores detects the number of cores available on your instance - - registerDoParallel(detectCores()) - Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% { + + registerDoParallel(detectCores()) + Results.temp <- foreach(i=1:K,.packages=c("MultilayerExtraction", "igraph")) %dopar% { starter <- list() starter$vertex.set <- as.numeric(initial.set$vertex.set[[i]]) #if the initial neighborhood is of length 1, add a random vertex @@ -73,22 +73,22 @@ multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.samp } starter$layer.set <- as.numeric(initial.set$layer.set[[i]]) single.swap(starter, adjacency, mod.matrix, m, n) - } - + } + #Cleanup the results: Keep the unique communities print(paste("Cleaning Stage")) - + if(length(Results.temp) < 1){return("No Community Found")} - + Scores = rep(0, length(Results.temp)) - + for(i in 1:length(Results.temp)){ if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} if(length(Results.temp[[i]]$B) > 0){ Scores[i] = Results.temp[[i]]$Score } } - + Scores = round(Scores, 5) #keep only unique communities with score greater than threshold indx = which(!duplicated(Scores) == TRUE) @@ -110,7 +110,7 @@ multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.samp Number.Communities[i] = length(temp$Communities) } } - + Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) Object = list(Community.List = Results3, Diagnostics = Z) class(Object) = "MultilayerCommunity" @@ -135,7 +135,7 @@ swap.candidate = function(set, changes, add, remove, score.old){ } } } - + #If there are only some to removed if(length(add) == 0 & length(remove) > 0){ if(is.na(remove) == FALSE){ @@ -150,7 +150,7 @@ swap.candidate = function(set, changes, add, remove, score.old){ } } } - + #If there are some to removed and some to be added if(length(add) > 0 & length(remove) > 0){ if(changes[remove] < 0 & changes[add] < 0){ @@ -168,7 +168,7 @@ swap.candidate = function(set, changes, add, remove, score.old){ return(list(set.new = set.new, score.old = score.old)) } } - + #if there are none to be added nor removed if(length(add) == 0 & length(remove) == 0){ return(list(set.new = set, score.old = score.old)) @@ -182,91 +182,91 @@ swap.layer = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m print('No Community Found') return(NULL) } - + changes <- layer.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n) changes[which(is.null(changes) == TRUE)] <- 0 changes[which(is.na(changes) == TRUE)] <- 0 - + outside.candidate <- which.max(changes[setdiff(1:m, layer.set)]) #which layer should we add? l.add <- setdiff(1:m, layer.set)[outside.candidate] - + inside.candidate <- which.max(changes[layer.set]) #which layer should we remove? l.sub <- layer.set[inside.candidate] - + #Make the swap results <- swap.candidate(layer.set, changes, l.add, l.sub, score.old) layer.set.new <- results$set.new score.old <- results$score.old - - return(list(layer.set.new = layer.set.new, score.old = score.old)) + + return(list(layer.set.new = layer.set.new, score.old = score.old)) } ####################################################################### ######Choosing which vertex to swap one at a time###### swap.vertex = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ - + if(length(layer.set) == 0){ print('No Community Found') return(NULL) } - + #swap decision if(length(vertex.set) < 5){ print('No Community Found') return(NULL) } - + if(length(vertex.set) == n){ print('No Community Found') return(NULL) } changes = vertex.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n) - + #Get candidates - outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1] + outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1] u.add <- setdiff(1:n, vertex.set)[outside.candidate] - + inside.candidate <- which.max(changes[vertex.set]) u.sub <- vertex.set[inside.candidate] - + #Make the swap results <- swap.candidate(vertex.set, changes, u.add, u.sub, score.old) - return(list(B.new = results$set.new, score.old = results$score.old)) + return(list(B.new = results$set.new, score.old = results$score.old)) } ####################################################################### #Inner function for a single swap inside the function for Multilayer.Extraction #Note: check the names of initial set single.swap = function(initial.set, adjacency, mod.matrix, m, n){ - + #initialize vertex.set and layer.set B.new <- initial.set$vertex.set I.new <- initial.set$layer.set score.old <- score(mod.matrix, vertex.set = B.new, layer.set = I.new, n) - + iterations <- 1 B.fixed <- B.new + 1 I.fixed <- I.new + 1 - + #main loop - while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) | + while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) | length(intersect(I.fixed, I.new)) < max(length(I.fixed), length(I.new))){ - + if(length(B.new) < 2 | length(I.new) < 1){ print('No community found') return(NULL) } - + #seems redundant, check... B.fixed <- B.new I.fixed <- I.new - + B <- B.new I <- I.new + 1 - + #update layer set if(m > 1){ while(length(intersect(I.new, I)) < max(length(I.new), length(I))){ - + I <- I.new results <- swap.layer(adjacency, mod.matrix, I, B, score.old, m, n) I.new <- results$layer.set.new @@ -276,15 +276,15 @@ single.swap = function(initial.set, adjacency, mod.matrix, m, n){ if(m == 1){ I.new <- 1 } - + #update vertex set B <- B - 1 B.new <- B + 1 - + while(length(intersect(B.new, B)) < max(length(B.new), length(B))){ B <- B.new results <- swap.vertex(adjacency, mod.matrix, I.new, B, score.old, m, n) - + B.new <- results$B.new score.old <- results$score.old } @@ -294,17 +294,17 @@ single.swap = function(initial.set, adjacency, mod.matrix, m, n){ ######Effect on score when adding or subtracting a layer####### layer.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ - + indx <- setdiff(1:m, layer.set) #which layers are not in the current set score.changes <- rep(0, m) - + for(i in 1:m){ if(i %in% indx){ - score.changes[i] <- score(mod.matrix, vertex.set = + score.changes[i] <- score(mod.matrix, vertex.set = vertex.set, layer.set = union(layer.set, i), n) - score.old } if(i %in% indx == FALSE){ - score.changes[i] <- score(mod.matrix, vertex.set = + score.changes[i] <- score(mod.matrix, vertex.set = vertex.set, layer.set = setdiff(layer.set, i), n) - score.old } } @@ -313,21 +313,21 @@ layer.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, ######Effect on score when adding or subtracting a vertex####### vertex.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ - + indx <- setdiff(1:n, vertex.set) score.changes <- rep(0, n) - + #the following can also be parallelized! for(i in 1:n){ if(i %in% indx){ - score.changes[i] <- score(mod.matrix, vertex.set = + score.changes[i] <- score(mod.matrix, vertex.set = union(vertex.set, i), layer.set = layer.set, n) - score.old } if(i %in% indx == FALSE){ - score.changes[i] <- score(mod.matrix, vertex.set = + score.changes[i] <- score(mod.matrix, vertex.set = setdiff(vertex.set, i), layer.set = layer.set, n) - score.old } } - + return(score.changes) } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..95be03b --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(MultilayerExtraction) + +test_check("MultilayerExtraction") diff --git a/tests/testthat/test-multilayer_extraction.R b/tests/testthat/test-multilayer_extraction.R new file mode 100644 index 0000000..717d069 --- /dev/null +++ b/tests/testthat/test-multilayer_extraction.R @@ -0,0 +1,7 @@ +context("test-multilayer_extraction.R") + +test_that("multiplication works", { + data("AU_CS") + network <- adjacency.to.edgelist(AU_CS) + community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) +})