Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
MultilayerExtraction.Rproj
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ Author: James D. Wilson [aut, cre]
Maintainer: James D. Wilson <jdwilson4@usfca.edu>
Description: An R package to identify and describe strongly connected vertex-layer communities in multilayer networks.
Depends:
R (>= 3.2.2)
R (>= 3.2.2), igraph, foreach, doParallel
License: GPL (>= 2)
Encoding: UTF-8
LazyData: TRUE
RoxygenNote: 6.0.1
Suggests:
testthat
5 changes: 5 additions & 0 deletions MultilayerExtraction.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

Expand Down
138 changes: 69 additions & 69 deletions R/multilayer_extraction.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' multilayer.extraction
#'
#'
#'Function that identifies statistically significant vertex-layer communities in multilayer networks.
#' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.
#' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.
#' @param seed: seed for reproducibility. The initial neighborhoods that act as seeds for the multilayer extraction algorithm
#' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123.
#' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123.
#' @param min.score: the minimum score allowable for an extracted community. Default is 0.
#' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05
#' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer.
Expand All @@ -12,59 +12,59 @@
#' @return A MultilayerCommunity object, which is a list containing the following objects
#' \itemize{
#' \item Community.List: a list of vertex-layer communities extracted from the algorithm
#' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of
#' each community, and includes for each level of overlap parameter Beta the mean score, and the total number
#' of communities. This is used for determining the overall number of communities in a multilayer network.
#' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of
#' each community, and includes for each level of overlap parameter Beta the mean score, and the total number
#' of communities. This is used for determining the overall number of communities in a multilayer network.
#' }
#'@references
#'\itemize{
#' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based
#' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based
#' extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research
#' }
#' }
#' @author James D. Wilson
#' @export
#'
#'
#' @export
#'
#'
multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.sample = 0.05, directed = c(FALSE, TRUE)){
#adjacency should be an edgelist with three columns - node1, node2, layer
#layer should be numbered with integers
#each network has the same number of nodes
#nodes are indexed by the integers starting from 1

m <- max(adjacency[, 3]) #max of the layer index
n <- length(unique(c(adjacency[, 1], adjacency[, 2])))
directed <- directed[1]
#Calculate the modularity matrix
print(paste("Estimation Stage"))
mod.matrix <- multilayer.modularity.matrix(adjacency)

mod.matrix <- multilayer.modularity.matrix(adjacency)

#Initialize the communities
print(paste("Initialization Stage"))

for(i in 1:m){
if(i == 1){
graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2],
graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2],
directed = directed)
initial.set <- initialization(graph, prop.sample, m, n)
}else{
graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2],
graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2],
directed = directed)
initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n))
}
}

#Search Across Initial sets
print(paste("Search Stage"))

cat(paste("Searching over", length(initial.set[[1]]), "seed sets \n"))
Results.temp <- list()
K <- length(initial.set[[1]])

#detectCores detects the number of cores available on your instance
registerDoParallel(detectCores())
Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% {

registerDoParallel(detectCores())
Results.temp <- foreach(i=1:K,.packages=c("MultilayerExtraction", "igraph")) %dopar% {
starter <- list()
starter$vertex.set <- as.numeric(initial.set$vertex.set[[i]])
#if the initial neighborhood is of length 1, add a random vertex
Expand All @@ -73,22 +73,22 @@ multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.samp
}
starter$layer.set <- as.numeric(initial.set$layer.set[[i]])
single.swap(starter, adjacency, mod.matrix, m, n)
}
}

#Cleanup the results: Keep the unique communities
print(paste("Cleaning Stage"))

if(length(Results.temp) < 1){return("No Community Found")}

Scores = rep(0, length(Results.temp))

for(i in 1:length(Results.temp)){
if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000}
if(length(Results.temp[[i]]$B) > 0){
Scores[i] = Results.temp[[i]]$Score
}
}

Scores = round(Scores, 5)
#keep only unique communities with score greater than threshold
indx = which(!duplicated(Scores) == TRUE)
Expand All @@ -110,7 +110,7 @@ multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.samp
Number.Communities[i] = length(temp$Communities)
}
}

Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities)
Object = list(Community.List = Results3, Diagnostics = Z)
class(Object) = "MultilayerCommunity"
Expand All @@ -135,7 +135,7 @@ swap.candidate = function(set, changes, add, remove, score.old){
}
}
}

#If there are only some to removed
if(length(add) == 0 & length(remove) > 0){
if(is.na(remove) == FALSE){
Expand All @@ -150,7 +150,7 @@ swap.candidate = function(set, changes, add, remove, score.old){
}
}
}

#If there are some to removed and some to be added
if(length(add) > 0 & length(remove) > 0){
if(changes[remove] < 0 & changes[add] < 0){
Expand All @@ -168,7 +168,7 @@ swap.candidate = function(set, changes, add, remove, score.old){
return(list(set.new = set.new, score.old = score.old))
}
}

#if there are none to be added nor removed
if(length(add) == 0 & length(remove) == 0){
return(list(set.new = set, score.old = score.old))
Expand All @@ -182,91 +182,91 @@ swap.layer = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m
print('No Community Found')
return(NULL)
}

changes <- layer.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n)
changes[which(is.null(changes) == TRUE)] <- 0
changes[which(is.na(changes) == TRUE)] <- 0

outside.candidate <- which.max(changes[setdiff(1:m, layer.set)]) #which layer should we add?
l.add <- setdiff(1:m, layer.set)[outside.candidate]

inside.candidate <- which.max(changes[layer.set]) #which layer should we remove?
l.sub <- layer.set[inside.candidate]

#Make the swap
results <- swap.candidate(layer.set, changes, l.add, l.sub, score.old)
layer.set.new <- results$set.new
score.old <- results$score.old
return(list(layer.set.new = layer.set.new, score.old = score.old))

return(list(layer.set.new = layer.set.new, score.old = score.old))
}

#######################################################################
######Choosing which vertex to swap one at a time######
swap.vertex = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){

if(length(layer.set) == 0){
print('No Community Found')
return(NULL)
}

#swap decision
if(length(vertex.set) < 5){
print('No Community Found')
return(NULL)
}

if(length(vertex.set) == n){
print('No Community Found')
return(NULL)
}
changes = vertex.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n)

#Get candidates
outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1]
outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1]
u.add <- setdiff(1:n, vertex.set)[outside.candidate]

inside.candidate <- which.max(changes[vertex.set])
u.sub <- vertex.set[inside.candidate]

#Make the swap
results <- swap.candidate(vertex.set, changes, u.add, u.sub, score.old)
return(list(B.new = results$set.new, score.old = results$score.old))
return(list(B.new = results$set.new, score.old = results$score.old))
}
#######################################################################
#Inner function for a single swap inside the function for Multilayer.Extraction
#Note: check the names of initial set
single.swap = function(initial.set, adjacency, mod.matrix, m, n){

#initialize vertex.set and layer.set
B.new <- initial.set$vertex.set
I.new <- initial.set$layer.set
score.old <- score(mod.matrix, vertex.set = B.new, layer.set = I.new, n)

iterations <- 1
B.fixed <- B.new + 1
I.fixed <- I.new + 1

#main loop
while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) |
while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) |
length(intersect(I.fixed, I.new)) < max(length(I.fixed), length(I.new))){

if(length(B.new) < 2 | length(I.new) < 1){
print('No community found')
return(NULL)
}

#seems redundant, check...
B.fixed <- B.new
I.fixed <- I.new

B <- B.new
I <- I.new + 1

#update layer set
if(m > 1){
while(length(intersect(I.new, I)) < max(length(I.new), length(I))){

I <- I.new
results <- swap.layer(adjacency, mod.matrix, I, B, score.old, m, n)
I.new <- results$layer.set.new
Expand All @@ -276,15 +276,15 @@ single.swap = function(initial.set, adjacency, mod.matrix, m, n){
if(m == 1){
I.new <- 1
}

#update vertex set
B <- B - 1
B.new <- B + 1

while(length(intersect(B.new, B)) < max(length(B.new), length(B))){
B <- B.new
results <- swap.vertex(adjacency, mod.matrix, I.new, B, score.old, m, n)

B.new <- results$B.new
score.old <- results$score.old
}
Expand All @@ -294,17 +294,17 @@ single.swap = function(initial.set, adjacency, mod.matrix, m, n){

######Effect on score when adding or subtracting a layer#######
layer.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){

indx <- setdiff(1:m, layer.set) #which layers are not in the current set
score.changes <- rep(0, m)

for(i in 1:m){
if(i %in% indx){
score.changes[i] <- score(mod.matrix, vertex.set =
score.changes[i] <- score(mod.matrix, vertex.set =
vertex.set, layer.set = union(layer.set, i), n) - score.old
}
if(i %in% indx == FALSE){
score.changes[i] <- score(mod.matrix, vertex.set =
score.changes[i] <- score(mod.matrix, vertex.set =
vertex.set, layer.set = setdiff(layer.set, i), n) - score.old
}
}
Expand All @@ -313,21 +313,21 @@ layer.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old,

######Effect on score when adding or subtracting a vertex#######
vertex.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){

indx <- setdiff(1:n, vertex.set)
score.changes <- rep(0, n)

#the following can also be parallelized!
for(i in 1:n){
if(i %in% indx){
score.changes[i] <- score(mod.matrix, vertex.set =
score.changes[i] <- score(mod.matrix, vertex.set =
union(vertex.set, i), layer.set = layer.set, n) - score.old
}
if(i %in% indx == FALSE){
score.changes[i] <- score(mod.matrix, vertex.set =
score.changes[i] <- score(mod.matrix, vertex.set =
setdiff(vertex.set, i), layer.set = layer.set, n) - score.old
}
}

return(score.changes)
}
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(MultilayerExtraction)

test_check("MultilayerExtraction")
7 changes: 7 additions & 0 deletions tests/testthat/test-multilayer_extraction.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
context("test-multilayer_extraction.R")

test_that("multiplication works", {
data("AU_CS")
network <- adjacency.to.edgelist(AU_CS)
community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10)
})