Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows pipeline support for BLRunner and BLEvaluator #110

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions Algorithms/ARBORETO/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@

# FROM continuumio/anaconda3:2018.12

FROM continuumio/anaconda3:2024.02-1

LABEL Maintainer="Aditya Pratapa <adyprat@vt.edu>"
Expand All @@ -9,8 +6,6 @@ USER root

RUN apt-get update

# RUN conda install -y -c bioconda/label/cf201901 arboreto=0.1.5 pandas=0.24.0

RUN conda install -y -c bioconda arboreto pandas

COPY runArboreto.py /
Expand Down
1 change: 0 additions & 1 deletion Algorithms/PIDC/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# FROM julia:1.1.0-stretch
FROM julia:1.6.0-buster

LABEL maintainer="Aditya Pratapa <adyprat@vt.edu>"
Expand Down
20 changes: 10 additions & 10 deletions Algorithms/SCSGL/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
#This is the base image upon which necessary libraries are installed
FROM python:3.8
FROM python:3.8

#Additional information
LABEL Maintainer="Yiqi Su <yiqisu@vt.edu>"
LABEL Maintainer="Yiqi Su <yiqisu@vt.edu>"

#Set main user as root to avoid permission issues
USER root
USER root

#Sets current working directory
WORKDIR /
WORKDIR /

#Copy the main Python script which will perform necessary GRN computations
COPY run_scSGL.py /
COPY run_scSGL.py /

#Copy the original scSGL repo files stored in SCSGL folder
COPY scSGL /scSGL
COPY scSGL /scSGL

#Installl time command to compute time taken
RUN apt-get update && apt-get install -y r-base time
RUN apt-get update && apt-get install -y r-base time

#Install the requirments and install R to conda environment
RUN pip install -r /scSGL/requirements.txt
RUN pip install -r /scSGL/requirements.txt

#Install pcaPP to use zero inflated Kendall tau as a kernel
RUN Rscript -e "install.packages('pcaPP')"
RUN Rscript -e "install.packages('pcaPP')"

#Make a directory to mount the folder containing input files
RUN mkdir data/
RUN mkdir data/
6 changes: 5 additions & 1 deletion BLEval/computeDGAUC.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pandas as pd
import numpy as np
import seaborn as sns
Expand Down Expand Up @@ -45,7 +46,7 @@ def PRROC(dataDict, inputSettings, directed = True, selfEdges = False, plotFlag
AUROC = {}

# set-up outDir that stores output directory name
outDir = "outputs/"+str(inputSettings.datadir).split("inputs/")[1]+ '/' +dataDict['name']
outDir = "outputs/"+"/".join(str(inputSettings.datadir).split("inputs" + os.sep)[1].split(os.sep))+ '/' +dataDict['name']

if directed:
for algo in tqdm(inputSettings.algorithms,
Expand Down Expand Up @@ -214,6 +215,9 @@ def computeScores(trueEdgesDF, predEdgeDF,
# to pass it to sklearn
outDF = pd.DataFrame([TrueEdgeDict,PredEdgeDict]).T
outDF.columns = ['TrueEdges','PredEdges']
utils = importr('utils')
utils.chooseCRANmirror(ind=1)
# utils.install_packages('PRROC')
prroc = importr('PRROC')
prCurve = prroc.pr_curve(scores_class0 = FloatVector(list(outDF['PredEdges'].values)),
weights_class0 = FloatVector(list(outDF['TrueEdges'].values)))
Expand Down
4 changes: 3 additions & 1 deletion BLEval/computeNetMotifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from itertools import product, permutations, combinations, combinations_with_replacement
from tqdm import tqdm
import networkx as nx
import os

def Motifs(datasetDict, inputSettings):
'''
Expand Down Expand Up @@ -68,7 +69,8 @@ def Motifs(datasetDict, inputSettings):
refMI = 1

# set-up outDir that stores output directory name
outDir = "outputs/"+str(inputSettings.datadir).split("inputs/")[1]+ '/' + datasetDict['name']
outDir = "outputs/"+str(inputSettings.datadir).split("inputs" + os.sep)[1] + '/' + datasetDict['name']
print(f"\n\nThe output directory is: {outDir}\n\n")
dataDict = {}
# dataDict['Conn. Comp'] = {}
dataDict['FFL'] = {}
Expand Down
3 changes: 2 additions & 1 deletion BLEval/computePathStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import os
sns.set(rc={"lines.linewidth": 2}, palette = "deep", style = "ticks")
from sklearn.metrics import precision_recall_curve, roc_curve, auc
from itertools import product, permutations, combinations, combinations_with_replacement
Expand Down Expand Up @@ -40,7 +41,7 @@ def pathAnalysis(dataDict, inputSettings):


# set-up outDir that stores output directory name
outDir = "outputs/"+str(inputSettings.datadir).split("inputs/")[1]+ '/' +dataDict['name']
outDir = "outputs/"+str(inputSettings.datadir).split("inputs" + os.sep)[1]+ '/' +dataDict['name']
#print(dataDict['name'])

##################################################
Expand Down
4 changes: 4 additions & 0 deletions BLEval/parseTime.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,8 @@ def parse_time_files(path):
print("Algorithm running failed, setting time value to -1\n")
time_val = -1

except IndexError:
print(f"Time output {path} file found but not populated, setting time value to -1\n")
time_val = -1

return time_val
8 changes: 4 additions & 4 deletions BLPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def boxplot(opts, evalConfigs, datasets, randValue, resTypeFile, resTypeName):
evalConfig = evalConfigs[i]

# Read output file containing AUROC values
DF = pd.read_csv(str(evalConfig.output_settings.base_dir) + '/' \
+ str(evalConfig.input_settings.datadir).split("inputs")[1] + '/' \
DF = pd.read_csv(str(evalConfig.output_settings.base_dir) + "/" \
+ "/".join(str(evalConfig.input_settings.datadir).split("inputs" + os.sep)[1].split(os.sep)) + '/' \
+ str(evalConfig.output_settings.output_prefix) \
+ '-' + resTypeFile + '.csv', header = 0, index_col = 0)

Expand Down Expand Up @@ -465,8 +465,8 @@ def main():
for j, dataset in enumerate(datasets):
evalConfig = evalConfigs[j]

ResDF = pd.read_csv(str(evalConfig.output_settings.base_dir) + '/' \
+ str(evalConfig.input_settings.datadir).split("inputs")[1] + '/' \
ResDF = pd.read_csv(str(evalConfig.output_settings.base_dir) \
+ "/".join(str(evalConfig.input_settings.datadir).split("inputs")[1].split(os.sep)) + '/' \
+ str(evalConfig.output_settings.output_prefix) \
+ '-' + resTypeFileName[i] + '.csv', header = 0, index_col = 0)

Expand Down
7 changes: 4 additions & 3 deletions BLRun/genie3Runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pandas as pd
from pathlib import Path
from BLRun.out_path_generator import get_output_path
import numpy as np

def generateInputs(RunnerObj):
Expand Down Expand Up @@ -30,10 +31,10 @@ def run(RunnerObj):

:param RunnerObj: An instance of the :class:`BLRun`
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep)) + \
"/GENIE3/ExpressionData.csv"
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GENIE3/"
outDir = get_output_path(RunnerObj, "/GENIE3/")
os.makedirs(outDir, exist_ok = True)

outPath = "data/" + str(outDir) + 'outFile.txt'
Expand All @@ -53,7 +54,7 @@ def parseOutput(RunnerObj):
:param RunnerObj: An instance of the :class:`BLRun`
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GENIE3/"
outDir = get_output_path(RunnerObj, "/GENIE3/")


# Read output
Expand Down
7 changes: 4 additions & 3 deletions BLRun/grisliRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from pathlib import Path
import numpy as np
from BLRun.out_path_generator import get_output_path

def generateInputs(RunnerObj):
'''
Expand Down Expand Up @@ -47,15 +48,15 @@ def run(RunnerObj):
alphaMin = str(RunnerObj.params['alphaMin'])

# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRISLI/"
outDir = get_output_path(RunnerObj, "/GRISLI/")
os.makedirs(outDir, exist_ok = True)

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)

colNames = PTData.columns
for idx in range(len(colNames)):
inputPath = "data"+str(RunnerObj.inputDir).split(str(Path.cwd()))[1]+"/GRISLI/"+str(idx)+"/"
inputPath = "data"+"/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep))+"/GRISLI/"+str(idx)+"/"
os.makedirs(outDir+str(idx), exist_ok = True)

outFile = "data/" + str(outDir) +str(idx)+"/outFile.txt"
Expand All @@ -72,7 +73,7 @@ def parseOutput(RunnerObj):
Function to parse outputs from GRISLI.
'''

outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRISLI/"
outDir = get_output_path(RunnerObj, "/GRISLI/")

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
Expand Down
7 changes: 4 additions & 3 deletions BLRun/grnboost2Runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pandas as pd
from pathlib import Path
from BLRun.out_path_generator import get_output_path
import numpy as np

def generateInputs(RunnerObj):
Expand All @@ -25,10 +26,10 @@ def run(RunnerObj):
'''
Function to run GRNBOOST2 algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep)) + \
"/GRNBOOST2/ExpressionData.csv"
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNBOOST2/"
outDir = get_output_path(RunnerObj, "/GRNBOOST2/")
os.makedirs(outDir, exist_ok = True)


Expand All @@ -46,7 +47,7 @@ def parseOutput(RunnerObj):
Function to parse outputs from GRNBOOST2.
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNBOOST2/"
outDir = get_output_path(RunnerObj, "/GRNBOOST2/")

if not Path(outDir+'outFile.txt').exists():
print(outDir+'outFile.txt'+'does not exist, skipping...')
Expand Down
7 changes: 4 additions & 3 deletions BLRun/grnvbemRunner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pandas as pd
from pathlib import Path
from BLRun.out_path_generator import get_output_path
import numpy as np

def generateInputs(RunnerObj):
Expand Down Expand Up @@ -46,9 +47,9 @@ def run(RunnerObj):
Function to run GRN-VBEM algorithm
'''

inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1]
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep))
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNVBEM/"
outDir = get_output_path(RunnerObj, "/GRNVBEM/")
os.makedirs(outDir, exist_ok = True)

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
Expand All @@ -72,7 +73,7 @@ def parseOutput(RunnerObj):
'''
Function to parse outputs from GRNVBEM.
'''
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNVBEM/"
outDir = get_output_path(RunnerObj, "/GRNVBEM/")

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
Expand Down
7 changes: 4 additions & 3 deletions BLRun/jump3Runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from pathlib import Path
import numpy as np
from BLRun.out_path_generator import get_output_path
from sklearn import preprocessing

def generateInputs(RunnerObj):
Expand Down Expand Up @@ -42,11 +43,11 @@ def run(RunnerObj):
'''
Function to run GRN-VBEM algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep)) + \
"/JUMP3/ExpressionData.csv"

# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/JUMP3/"
outDir = get_output_path(RunnerObj, "/JUMP3/")
os.makedirs(outDir, exist_ok = True)

outPath = "data/" + str(outDir) + 'outFile.txt'
Expand All @@ -62,7 +63,7 @@ def parseOutput(RunnerObj):
Function to parse outputs from JUMP3.
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/JUMP3/"
outDir = get_output_path(RunnerObj, "/JUMP3/")
if not Path(outDir+'outFile.txt').exists():
print(outDir+'outFile.txt'+'does not exist, skipping...')
return
Expand Down
7 changes: 4 additions & 3 deletions BLRun/leapRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from pathlib import Path
import numpy as np
from BLRun.out_path_generator import get_output_path

def generateInputs(RunnerObj):
'''
Expand Down Expand Up @@ -44,12 +45,12 @@ def run(RunnerObj):
Requires the maxLag parameter
'''

inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1]
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep))

maxLag = str(RunnerObj.params['maxLag'])

# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/LEAP/"
outDir = get_output_path(RunnerObj, "/LEAP/")
os.makedirs(outDir, exist_ok = True)

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
Expand All @@ -74,7 +75,7 @@ def parseOutput(RunnerObj):
'''
Function to parse outputs from LEAP.
'''
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/LEAP/"
outDir = get_output_path(RunnerObj, "/LEAP/")

PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
Expand Down
5 changes: 5 additions & 0 deletions BLRun/out_path_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os


def get_output_path(runner_obj, algo_name):
return "outputs/"+"/".join(str(runner_obj.inputDir).split("inputs" + os.sep)[1].split(os.sep))+algo_name
7 changes: 4 additions & 3 deletions BLRun/pidcRunner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pandas as pd
from pathlib import Path
from BLRun.out_path_generator import get_output_path
import numpy as np

def generateInputs(RunnerObj):
Expand All @@ -23,11 +24,11 @@ def run(RunnerObj):
'''
Function to run PIDC algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
inputPath = "data" + "/".join(str(RunnerObj.inputDir).split(str(Path.cwd()))[1].split(os.sep)) + \
"/PIDC/ExpressionData.csv"

# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/PIDC/"
outDir = get_output_path(RunnerObj, "/PIDC/")
os.makedirs(outDir, exist_ok = True)

outPath = 'data/'+ str(outDir) + 'outFile.txt'
Expand All @@ -43,7 +44,7 @@ def parseOutput(RunnerObj):
Function to parse outputs from SCODE.
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/PIDC/"
outDir = get_output_path(RunnerObj, "/PIDC/")
if not Path(outDir+'outFile.txt').exists():
print(outDir+'outFile.txt'+'does not exist, skipping...')
return
Expand Down
Loading