Skip to content

Commit

Permalink
Merge pull request #44 from maxplanck-ie/develop
Browse files Browse the repository at this point in the history
v1.3.2
  • Loading branch information
WardDeb authored Feb 13, 2024
2 parents 01acf94 + dc5211c commit 476cb05
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 22 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: build

on:
push:

defaults:
run:
shell: bash -l {0}

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: mamba-org/setup-micromamba@main
with:
environment-file: requirements.yaml
cache-downloads: true
environment-name: ultraheatmap
- name: build
run: |
micromamba activate ultraheatmap
pip install .
- name: addFeatureToMatrix
run: |
micromamba activate ultraheatmap
bash ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh
- name: computeOrderedMatrix
run: |
micromamba activate ultraheatmap
bash ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Then activate the environment:

To install the program in this environment:

$ python setup.py install
$ pip install .


from the ultraheatmap directory.
Expand All @@ -48,7 +48,7 @@ Alternatively, `pip` or `conda` can be used to install the package. We highly
recommend you to create a new conda environment prior to the installation and
install it after activating this environment. This can be done as follows:

$ conda create -n ultraheatmap python=3.6
$ conda create -n ultraheatmap python=3.10

$ conda activate ultraheatmap

Expand Down
4 changes: 2 additions & 2 deletions meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "ultraheatmap" %}
{% set version = "1.3.1" %}
{% set version = "1.3.2" %}

package:
name: "{{ name|lower }}"
Expand All @@ -21,7 +21,7 @@ requirements:
run:
- python >=3.10
- bedtools >2
- deeptools >3.5.3
- deeptools >=3.5.3
- gffutils
- pybedtools
- pybigwig
Expand Down
15 changes: 7 additions & 8 deletions requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ channels:
- conda-forge
- bioconda
dependencies:
- python >= 3.6
- bedtools=2.27.1
- deeptools >= 3.3.1
- gffutils=0.9
- pybedtools=0.7.10
- pybigwig=0.3.10
- pyyaml >= 5.1
- pysam >= 0.16.0.1
- python >= 3.10
- bedtools
- deeptools >= 3.5.3
- gffutils
- pybedtools
- pybigwig
- pyyaml
176 changes: 176 additions & 0 deletions script/multi_heatmap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# generating hybrid matrices and plot them by plotHeatmap

#!/usr/bin/env python

import os
import subprocess as sp
import glob
import sys
import argparse
import yaml

def parse_args():
"""
parsing arguments
"""
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

required = parser.add_argument_group('required arguments')
optional = parser.add_argument_group('optional arguments')

# required argumnets:
required.add_argument("--config",
"-c",
dest="config",
help="config to use to plot heatmap (yaml file)",
required=True)

# optional arguments:
optional.add_argument("--heatmapOnly",
"-ho",
default=False,
action='store_true',
dest="heatmapOnly",
help="only plot heatmap for given matrices in yaml file")



return parser


def main():
"""
compute_matrix > ultraheatmap (optional) > heatmap (or profile only if no ultraheatmap)
"""
parser = parse_args()
args = parser.parse_args()
with open(os.path.join(args.config), 'r') as stream:
config = yaml.safe_load(stream)


# transcriptomics data
try:
path_2_rna = config["path_2_rna"]
except:
path_2_rna = ""

# regions to plot on
regions_to_plot = config["regions_to_plot"]

# output params
output_path = config["output_path"]

matrix_name = os.path.join(output_path, config["matrix_name"])
scale_region = config["scale_region"]

heatmap_name = os.path.join(output_path, config["heatmap_name"])

bws = " "
names = " "

for k, v in config["bws"].items():
bws += v+" "
names += k+" "

deg_list =[]
deg_names = " "
if config["ultraheatmap_matrix_name"] != "":
for i, deg in enumerate( config["deg_list"]):
deg_list.append(os.path.join(config["path_2_rna"], deg))
# deg_list = [os.path.join(path_2_rna, "nej_vs_wt_shrunk.tsv")]
deg_names += config["deg_names"][i]+" "
ultraheatmap_matrix_name = os.path.join(output_path, config["ultraheatmap_matrix_name"])


if not args.heatmapOnly:
if scale_region:
cmd = "computeMatrix scale-regions "
cmd += " -S "+bws
cmd += " -R "+regions_to_plot
cmd += " -p 20 "
cmd += "-bs "+str(config["binsize"])
cmd += " --skipZeros --regionBodyLength "+str(config["regionBodyLength"])
cmd += " -a "+str(config["after_region"])
cmd += " -b "+str(config["before_region"])
cmd += " --missingDataAsZero --skipZeros --samplesLabel "+names
cmd += " -o "+ matrix_name
if config['averageTypeBins']:
cmd += ' --averageTypeBins '+config['averageTypeBins']
print(cmd)
sp.check_output(cmd, shell = True)
else:
cmd = "computeMatrix reference-point --referencePoint "+config["refpoint"]
cmd += " -S "+bws
cmd += " -R "+regions_to_plot
cmd += " -p 20 --skipZeros "
cmd += " -a "+str(config["after_region"])
cmd += " -b "+str(config["before_region"])
cmd += " --missingDataAsZero --skipZeros --samplesLabel "+names
cmd += " -o "+ matrix_name
if config['averageTypeBins']:
cmd += ' --averageTypeBins '+config['averageTypeBins']
sp.check_output(cmd, shell = True)
if config["ultraheatmap_matrix_name"] != "":
w_mapping = config["w_mapping"]
if w_mapping:
cmd = "addFeatureToMatrix "
cmd += "-m "+ matrix_name
cmd += " -o "+ultraheatmap_matrix_name
cmd += " -t "+" ".join(deg_list)
cmd += " --featureNames "+config['featureNames']
cmd += " --referencePoint TSS "
cmd += " --genomeGtf "+config["gtf"]
sp.check_output(cmd, shell = True)
else:
cmd = "addFeatureToMatrix "
cmd += "-m "+ matrix_name
cmd += " -o "+ultraheatmap_matrix_name
cmd += " -t "+" ".join(deg_list)
if config['featureNames']:
cmd += " --featureNames "+config['featureNames']
if config['featureIdColumn']:
cmd += " --featureIdColumn "+config['featureIdColumn']
print(cmd)
sp.check_output(cmd, shell = True)


# plot heatmap
all_samples_names = names
matrix_to_plot = matrix_name
if config["ultraheatmap_matrix_name"] != "":
all_samples_names+= deg_names
matrix_to_plot = ultraheatmap_matrix_name
cmd = "plotHeatmap -m "
cmd += matrix_to_plot
if config["zmin"]:
cmd += " --zMin "+config["zmin"]
if config["zmax"]:
cmd+= " --zMax "+config["zmax"]
if config["colors"]:
cmd += " --colorMap "+config["colors"]
cmd += " -o "+heatmap_name
cmd += " --samplesLabel "+all_samples_names
cmd += " --whatToShow 'heatmap and colorbar' "
if sort_by_sample != "":
cmd += " --sortUsingSamples "+config["sort_by_sample"]
if config["sortUsing"]:
cmd += " --sortUsing "+config["sortUsing"]
if config["sorting_direction"]
"--sortRegions "+config["sorting_direction"]
if config["sorted_regions"] != "":
sorted_regions = os.path.join(output_path, config["sorted_regions"])
cmd += " --outFileSortedRegions "+sorted_regions
if config["sorted_matrix"] != "":
sorted_matrix = os.path.join(output_path, config["sorted_matrix"])
cmd += " --outFileNameMatrix "+sorted_matrix
if config["kmeans"]:
cmd += " --kmeans "+config["kmeans"]
if config["clusterUsingSamples"]:
cmd += " --clusterUsingSamples "+config["clusterUsingSamples"]
else:
if config["reg_labels"]:
cmd += " --regionsLabel "+config["reg_labels"]
print(cmd)
sp.check_output(cmd, shell = True)
if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
bws:
bw1: "/path/to/bw1.bw"
bw2: "/path/to/bw2.bw"

path_2_rna: "/path/to/rna"
regions_to_plot: "/path/region.bed"
# output params
output_path: "/path/to/output"
matrix_name: "output.gz"
scale_region: True
before_region: 2000
after_region: 0
regionBodyLength: 3500
binsize: 10
refpoint: ""
deg_list: ["rn1.tsv", "rna2.tsv"]
deg_names: ["rn1", "rna2"]
ultraheatmap_matrix_name: "appended_output.gz"
w_mapping: False
gtf:""
featureIdColumn: 'GeneID'
featureNames: 'log2FoldChange'
zmin: "" #lowest intensity to plot
zmax: "" #highest intensity to plot
colors: "" #matplot lib colors
averageTypeBins: ""
sortUsing: ""
sort_by_sample: ""
sorting_direction: " descend "
reg_labels: " "
heatmap_name: "plotname.png"
sorted_regions: ""
sorted_matrix: ""
remove_na: False
kmeans: ""
clusterUsingSamples: ""
2 changes: 1 addition & 1 deletion ultraheatmap/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.3.1'
__version__ = '1.3.2'
1 change: 1 addition & 0 deletions ultraheatmap/addFeatureToMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def main():
args.referencePoint,
args.closestGenesOutput) # XXX instead of all these arguments i can simply add args.
# paste an extra column per table to the input matrix
print("closest gene is found")
extract_ge_folchange_per_peak(regions, args.tables, closestMapping,
args.Features, args.idcolumn, hm)

Expand Down
2 changes: 1 addition & 1 deletion ultraheatmap/computeOrderedMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def main():
# 1. Read the default config file
with open(os.path.join(configDir, "configs", "computeOrderedMatrix.yaml"),
'r') as stream:
defaultconfigfile = yaml.load(stream)
defaultconfigfile = yaml.safe_load(stream)

# 2. Parse the arguments
parser = parse_args(defaultconfigfile)
Expand Down
12 changes: 7 additions & 5 deletions ultraheatmap/parseTables.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,18 @@ def extract_ge_folchange_per_peak(peaks, tables, closestMapping, features,
Peaks = BedTool(peaks)
Peaks=Peaks.sort()
field_count = Peaks.field_count()
keyMap_closest = keymap_from_closest_genes(closestMapping, peaks, field_count)
keyMap_closest = keymap_from_closest_genes(closestMapping, peaks, field_count) ##TODO very inefficient!!
print("start updating the matrix")
__update_matrix_values(peaks, keyMap_closest, tables,features,IdColumn,hm)

def __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn):
"""
"""
print(table[IdColumn].values)
count = 0
v = np.empty((len(peaks), len(features)), dtype=float)
for i, peak in enumerate(peaks):
for i, peak in enumerate(peaks): ##TODO get rid of the for loop
key = ';'.join(map(str,peak))
value = keyMap_closest[key]
if value in table[IdColumn].values: #value is geneId
Expand All @@ -106,7 +108,8 @@ def __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn):
v[i,j] = x
else:
v[i] = [ np.nan ]*len(features)



return v


Expand All @@ -120,7 +123,7 @@ def __getValuesFromNameTable(peaks, table, features, IdColumn):
name = peak[3]
if name in table[IdColumn].values:
for j, feature in enumerate(features):
x = float(table[table[IdColumn] == name][feature])
x = float(table[table[IdColumn] == name][feature].iloc[0])
if np.isnan(x):
x = np.nan
v[i,j] = x
Expand All @@ -138,7 +141,6 @@ def __update_matrix_values(peaks, keyMap_closest, tables, features, IdColumn, hm
for i, table in enumerate(tables):
table = parseTable(table)
values = __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn)

valuesTab[:,i*len(features):(i*len(features)+len(features))] = values
for feature in features:
hm.matrix.sample_labels = hm.matrix.sample_labels + ["table"+str(i)+"_"+feature]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/env bash
computeMatrix reference-point -S ../scores.bw -R ../regions.bed -o matrix.gz
addFeatureToMatrix -m matrix.gz -o appended_matrix.gz -t features.bed -f score1 score2 --featureIdColumn name
computeMatrix reference-point -S ultraheatmap/test/test_data/scores.bw -R ultraheatmap/test/test_data/regions.bed -o matrix.gz
addFeatureToMatrix -m matrix.gz -o appended_matrix.gz -t ultraheatmap/test/test_data/addFeatureToMatrix/features.bed -f score1 score2 --featureIdColumn name
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/usr/bin/env bash
computeOrderedMatrix -S ../scores.bw ../scores.bw -R ../regions.bed -o matrix_ordered.gz -g 1 --kmean 2
computeOrderedMatrix -S ultraheatmap/test/test_data/scores.bw ultraheatmap/test/test_data/scores.bw -R ultraheatmap/test/test_data/regions.bed -o matrix_ordered.gz -g 1 --kmean 2

0 comments on commit 476cb05

Please sign in to comment.