Skip to content

Commit

Permalink
Merge pull request #2 from mofiarska/master
Browse files Browse the repository at this point in the history
  • Loading branch information
sienkie authored May 20, 2018
2 parents eb3f50a + 6e5fd12 commit 6cdbebc
Show file tree
Hide file tree
Showing 44 changed files with 5,382 additions and 0 deletions.
139 changes: 139 additions & 0 deletions pypathway/hotnet/HotNet2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python

# Load required modules
import sys, os, json
from itertools import product

# Load HotNet2 modules
from hotnet2 import run as hnrun, hnap, hnio, heat as hnheat, consensus_with_stats, viz as hnviz
from hotnet2.constants import ITERATION_REPLACEMENT_TOKEN, HN2_INFMAT_NAME

sys.path.append(os.path.normpath(os.path.dirname(os.path.realpath(__file__)) + '/scripts/'))
import createDendrogram as CD

def get_parser():
description = "Helper script for simple runs of generalized HotNet2, including automated"\
"parameter selection."
parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')

parser.add_argument('-nf', '--network_files', required=True, nargs='*',
help='Path to HDF5 (.h5) file containing influence matrix and edge list.')
parser.add_argument('-pnp', '--permuted_network_paths', required=True, default='',
help='Path to influence matrices for permuted networks, one path '\
'per network file. Include ' + ITERATION_REPLACEMENT_TOKEN + ' '\
'in the path to be replaced with the iteration number', nargs='*')
parser.add_argument('-hf', '--heat_files', required=True, nargs='*',
help='Path to heat file containing gene names and scores. This can either'\
'be a JSON file created by generateHeat.py, in which case the file'\
'name must end in .json, or a tab-separated file containing a gene'\
'name in the first column and the heat score for that gene in the'\
'second column of each line.')
parser.add_argument('-ccs', '--min_cc_size', type=int, default=2,
help='Minimum size connected components that should be returned.')
parser.add_argument('-d', '--deltas', nargs='*', type=float, default=[],
help='Delta value(s).')
parser.add_argument('-np', '--network_permutations', type=int, default=100,
help='Number of permutations to be used for delta parameter selection.')
parser.add_argument('-cp', '--consensus_permutations', type=int, default=0,
help='Number of permutations to be used for consensus statistical significance testing.')
parser.add_argument('-hp', '--heat_permutations', type=int, default=100,
help='Number of permutations to be used for statistical significance testing.')
parser.add_argument('-o', '--output_directory', required=True, default=None,
help='Output directory. Files results.json, components.txt, and'\
'significance.txt will be generated in subdirectories for each delta.')
parser.add_argument('-c', '--num_cores', type=int, default=1,
help='Number of cores to use for running permutation tests in parallel. If'\
'-1, all available cores will be used.')
parser.add_argument('-dsf', '--display_score_file',
help='Path to a tab-separated file containing a gene name in the first'\
'column and the display score for that gene in the second column of'\
'each line.')
parser.add_argument('-dnf', '--display_name_file',
help='Path to a tab-separated file containing a gene name in the first'\
'column and the display name for that gene in the second column of'\
'each line.')
parser.add_argument('--output_hierarchy', default=False, required=False, action='store_true',
help='Output the hierarchical decomposition of the HotNet2 similarity matrix.')
parser.add_argument('--verbose', default=1, choices=list(range(5)), type=int, required=False,
help='Set verbosity of output (minimum: 0, maximum: 5).')

return parser

def run(args):
# Load the network and heat files
assert( len(args.network_files) == len(args.permuted_network_paths) )
networks, graph_map = [], dict()
for network_file, pnp in zip(args.network_files, args.permuted_network_paths):
infmat, indexToGene, G, network_name = hnio.load_network(network_file, HN2_INFMAT_NAME)
graph_map[network_name] = G
networks.append( (infmat, indexToGene, G, network_name, pnp) )

heats, json_heat_map, heat_map, mutation_map, heat_file_map = [], dict(), dict(), dict(), dict()
for heat_file in args.heat_files:
json_heat = os.path.splitext(heat_file.lower())[1] == '.json'
heat, heat_name, mutations = hnio.load_heat_file(heat_file, json_heat)
json_heat_map[heat_name] = json_heat
heat_map[heat_name] = heat
heat_file_map[heat_name] = heat_file
mutation_map[heat_name] = mutations
heats.append( (heat, heat_name) )

# Run HotNet2 on each pair of network and heat files
if args.verbose > 0:
print('* Running HotNet2 in consensus mode...')

single_runs, consensus, linkers, auto_deltas, consensus_stats = consensus_with_stats(args, networks, heats)

# Output the single runs
if args.verbose > 0:
print('* Outputting results to file...')

params = vars(args)
result_dirs = []
for (network_name, heat_name, run) in single_runs:
# Set up the output directory and record for later
output_dir = '%s/%s-%s' % (args.output_directory, network_name.lower(), heat_name.lower())
result_dirs.append(output_dir)
hnio.setup_output_dir(output_dir)

# Output to file
hnio.output_hotnet2_run(run, params, network_name, heat_map[heat_name], heat_name, heat_file_map[heat_name], json_heat_map[heat_name], output_dir)

# create the hierarchy if necessary
if args.output_hierarchy:
hierarchy_out_dir = '{}/hierarchy/'.format(output_dir)
if not os.path.isdir(hierarchy_out_dir): os.mkdir(hierarchy_out_dir)
CD.createDendrogram( sim, list(index2gene.values()), hierarchy_out_dir, params, verbose=False)

# Output the consensus
hnio.output_consensus(consensus, linkers, auto_deltas, consensus_stats, params, args.output_directory)

# Create the visualization(s). This has to be after the consensus procedure
# is run because we want to default to the auto-selected deltas.
if args.verbose > 0:
print('* Generating and outputting visualization data...')

d_score = hnio.load_display_score_tsv(args.display_score_file) if args.display_score_file else None
d_name = hnio.load_display_name_tsv(args.display_name_file) if args.display_name_file else dict()
for (network_name, heat_name, run), result_dir, auto_delta in zip(single_runs, result_dirs, auto_deltas):
snvs, cnas, sampleToType = mutation_map[heat_name]
G = graph_map[network_name]

output = hnviz.generate_viz_json(run, G.edges(), network_name, heat_map[heat_name], snvs, cnas, sampleToType, d_score, d_name)

with open('{}/viz-data.json'.format(result_dir), 'w') as OUT:
output['params'] = dict(consensus=False, network_name=network_name, heat_name=heat_name, auto_delta=format(auto_delta, 'g'))
json.dump( output, OUT )

# Add the consensus visualization
snvs, cnas, sampleToType = mutations
consensus_ccs = [ d['core'] + d['expansion'] for d in consensus ]
consensus_auto_delta = 0
results = [[consensus_ccs, consensus_stats, consensus_auto_delta]]
with open('{}/consensus/viz-data.json'.format(args.output_directory), 'w') as OUT:
output = hnviz.generate_viz_json(results, G.edges(), network_name, heat, snvs, cnas, sampleToType, d_score, d_name)
output['params'] = dict(consensus=True, auto_delta=format(consensus_auto_delta, 'g'))
json.dump( output, OUT )

if __name__ == "__main__":
run(get_parser().parse_args(sys.argv[1:]))
Empty file added pypathway/hotnet/__init__.py
Empty file.
14 changes: 14 additions & 0 deletions pypathway/hotnet/hotnet2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .constants import *
from . import delta
from . import heat
from . import hnap
from . import hnio
from . import hotnet2
from . import permutations
from . import run
from . import stats
from . import union_find
from . import viz
from . import hierarchy
from .consensus import *

122 changes: 122 additions & 0 deletions pypathway/hotnet/hotnet2/c_ext_src/basic.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//
// basic.c
// color_coding
//
// Created by Yang Xu on 2017/8/16.
// Copyright © 2017年 sheep. All rights reserved.
//

#include "basic.h"
#include "data_structure.h"
#include <string.h>
#include <stdlib.h>

struct Graph* DiGraphFromMatrix(struct MatrixDes* md) {
struct Graph* G = (struct Graph*)malloc(sizeof(struct Graph));
G->nodes = (struct Node*)malloc(sizeof(struct Node) * md->length);
char * mt = md->matrix;
for (int i = 0; i < md->length; i++) {
G->nodes[i].degree = 0;
G->nodes[i].id = i;
for (int j = 0; j < md->length; j++) {
if (*(mt + md->length * i + j)) {
if (G->nodes[i].degree == 0) {
G->nodes[i].neighbours = (int*)malloc(sizeof(int));
G->nodes[i].neighbours[0] = j;
G->nodes[i].degree++;
}else{
G->nodes[i].neighbours = (int*)realloc(G->nodes[i].neighbours,
(G->nodes[i].degree + 1) * sizeof(int));
G->nodes[i].neighbours[G->nodes[i].degree] = j;
G->nodes[i].degree++;
}
}
}
}
G->node_count = md->length;
return G;
}


struct SubQueue* stronglyConnectedComponent(struct Graph* G) {
// store the result
struct SubQueue* result = (struct SubQueue*)malloc(sizeof(struct SubQueue));
struct SubQueue* current = result;
result->next = NULL;
result->queue = NULL;
int * preorder = (int *)malloc(sizeof(int) * G->node_count);
memset(preorder, 0, sizeof(int) * G->node_count);
int * lowlink = (int *)malloc(sizeof(int) * G->node_count);
memset(lowlink, 0, sizeof(int) * G->node_count);
int * scc_found = (int *)malloc(sizeof(int) * G->node_count);
memset(scc_found, 0, sizeof(int) * G->node_count);
struct QueueNode* scc_queue = NULL;
int i = 0;
for (int j = 0; j < G->node_count; j++) {
if (G->nodes[j].degree == 0) {
// this is an empty node
continue;
}
if (scc_found[G->nodes[j].id]) {
// already done
continue;
}
struct QueueNode* queue = initQueue(j);
// check if queue is empty while it should be empty
while (queue) {
int v = queuePeakRight(queue);
if (preorder[v] == 0) {
i += 1;
preorder[v] = i;
}
int done = 1;
// go through beighbours
for (int k = 0; k < G->nodes[v].degree; k++) {
int nei_id = G->nodes[v].neighbours[k];
if (preorder[nei_id] == 0) {
queueAppend(&queue, nei_id);
done = 0;
break;
}
}
if (done == 1) {
lowlink[v] = preorder[v];
for (int k = 0; k < G->nodes[v].degree; k++) {
int nei_id = G->nodes[v].neighbours[k];
if (scc_found[nei_id] == 0) {
if (preorder[nei_id] > preorder[v]) {
lowlink[v] = lowlink[v] < lowlink[nei_id] ? lowlink[v] : lowlink[nei_id];
}else{
lowlink[v] = lowlink[v] < preorder[nei_id] ? lowlink[v] : preorder[nei_id];
}
}
}
queuePopRight(&queue);
if (lowlink[v] == preorder[v]) {
scc_found[v] = 1;
struct QueueNode* resultQueue = initQueue(v);
// printf("%i, ", v);
while (scc_queue && preorder[queuePeakRight(scc_queue)] > preorder[v]) {
int k = queuePopRight(&scc_queue);
scc_found[k] = 1;
// printf("%i, ", k);
queueAppend(&resultQueue, k);
}
// printf("\n");
if (result->queue == NULL) {
result->queue = resultQueue;
}else{
struct SubQueue* next = (struct SubQueue*)malloc(sizeof(struct SubQueue));
next->queue = resultQueue;
next->next = NULL;
current->next = next;
current=next;
}
}else{
queueAppend(&scc_queue, v);
}
}
}
}
return result;
}
19 changes: 19 additions & 0 deletions pypathway/hotnet/hotnet2/c_ext_src/basic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//
// basic.h
// color_coding
//
// Created by Yang Xu on 2017/8/16.
// Copyright © 2017年 sheep. All rights reserved.
//

#ifndef basic_h
#define basic_h

#include <stdio.h>
#include "data_structure.h"

struct Graph* DiGraphFromMatrix(struct MatrixDes* md);

struct SubQueue* stronglyConnectedComponent(struct Graph* G);

#endif /* basic_h */
85 changes: 85 additions & 0 deletions pypathway/hotnet/hotnet2/c_ext_src/data_structure.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//
// data_structure.c
// color_coding
//
// Created by sheep on 2017/8/2.
// Copyright © 2017年 sheep. All rights reserved.
//

#include "data_structure.h"
#include <stdlib.h>

void queueAppend(struct QueueNode** queue, int val){
// if the queue is enpty
if (*queue == NULL) {
*queue = initQueue(val);
return;
}
struct QueueNode* node = (struct QueueNode*)malloc(sizeof(struct QueueNode));
node->value = val;
node->next = NULL;
node->end = node;
struct QueueNode* perious_end = (*queue)->end;
node->perious = perious_end;
(*queue)->end->next = node;
(*queue)->end = node;
}

int queuePopLeft(struct QueueNode** queue){
if ((*queue)->next == NULL) {
// only has one node
int val = (*queue)->value;
*queue = NULL;
return val;
}
struct QueueNode* left = *queue;
struct QueueNode* new_left = left->next;
new_left->end = left->end;
int left_val = left->value;
*queue = new_left;
return left_val;
}

int queuePopRight(struct QueueNode** queue){
struct QueueNode* right = NULL;
if (*queue == NULL) {
return -1;
}else if ((*queue)->next == NULL) {
int val = (*queue)->value;
right = *queue;
free(right);
*queue = NULL;
return val;
}else{
struct QueueNode* cur_end = (*queue)->end;
struct QueueNode* new_end = cur_end->perious;
(*queue)->end = new_end;
new_end->next = NULL;
int val = cur_end->value;
free(cur_end);
return val;
}
}

int queuePeakLeft(struct QueueNode* queue){
return queue->value;
}

int queuePeakRight(struct QueueNode* queue){
return queue->end->value;
}

struct QueueNode* initQueue(int val){
struct QueueNode* node = (struct QueueNode*)malloc(sizeof(struct QueueNode));
node->next = NULL;
node->value = val;
node->end = node;
node->perious = NULL;
return node;
}

// The structure representation of the graph node




Loading

0 comments on commit 6cdbebc

Please sign in to comment.