diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..de0a953 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,32 @@ +name: build + +on: + push: + +defaults: + run: + shell: bash -l {0} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: mamba-org/setup-micromamba@main + with: + environment-file: requirements.yaml + cache-downloads: true + environment-name: ultraheatmap + - name: build + run: | + micromamba activate ultraheatmap + pip install . + - name: addFeatureToMatrix + run: | + micromamba activate ultraheatmap + bash ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh + - name: computeOrderedMatrix + run: | + micromamba activate ultraheatmap + bash ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh + diff --git a/README.md b/README.md index ecc1eba..8cde20c 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Then activate the environment: To install the program in this environment: - $ python setup.py install + $ pip install . from the ultraheatmap directory. @@ -48,7 +48,7 @@ Alternatively, `pip` or `conda` can be used to install the package. We highly recommend you to create a new conda environment prior to the installation and install it after activating this environment. This can be done as follows: - $ conda create -n ultraheatmap python=3.6 + $ conda create -n ultraheatmap python=3.10 $ conda activate ultraheatmap diff --git a/meta.yaml b/meta.yaml index c4d8e5c..b64fd93 100644 --- a/meta.yaml +++ b/meta.yaml @@ -1,5 +1,5 @@ {% set name = "ultraheatmap" %} -{% set version = "1.3.1" %} +{% set version = "1.3.2" %} package: name: "{{ name|lower }}" @@ -21,7 +21,7 @@ requirements: run: - python >=3.10 - bedtools >2 - - deeptools >3.5.3 + - deeptools >=3.5.3 - gffutils - pybedtools - pybigwig diff --git a/requirements.yaml b/requirements.yaml index a209566..0e95ebf 100755 --- a/requirements.yaml +++ b/requirements.yaml @@ -3,11 +3,10 @@ channels: - conda-forge - bioconda dependencies: - - python >= 3.6 - - bedtools=2.27.1 - - deeptools >= 3.3.1 - - gffutils=0.9 - - pybedtools=0.7.10 - - pybigwig=0.3.10 - - pyyaml >= 5.1 - - pysam >= 0.16.0.1 + - python >= 3.10 + - bedtools + - deeptools >= 3.5.3 + - gffutils + - pybedtools + - pybigwig + - pyyaml diff --git a/script/multi_heatmap.py b/script/multi_heatmap.py new file mode 100755 index 0000000..710da4f --- /dev/null +++ b/script/multi_heatmap.py @@ -0,0 +1,176 @@ +# generating hybrid matrices and plot them by plotHeatmap + +#!/usr/bin/env python + +import os +import subprocess as sp +import glob +import sys +import argparse +import yaml + +def parse_args(): + """ + parsing arguments + """ + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + required = parser.add_argument_group('required arguments') + optional = parser.add_argument_group('optional arguments') + + # required argumnets: + required.add_argument("--config", + "-c", + dest="config", + help="config to use to plot heatmap (yaml file)", + required=True) + + # optional arguments: + optional.add_argument("--heatmapOnly", + "-ho", + default=False, + action='store_true', + dest="heatmapOnly", + help="only plot heatmap for given matrices in yaml file") + + + + return parser + + +def main(): + """ + compute_matrix > ultraheatmap (optional) > heatmap (or profile only if no ultraheatmap) + """ + parser = parse_args() + args = parser.parse_args() + with open(os.path.join(args.config), 'r') as stream: + config = yaml.safe_load(stream) + + + # transcriptomics data + try: + path_2_rna = config["path_2_rna"] + except: + path_2_rna = "" + + # regions to plot on + regions_to_plot = config["regions_to_plot"] + + # output params + output_path = config["output_path"] + + matrix_name = os.path.join(output_path, config["matrix_name"]) + scale_region = config["scale_region"] + + heatmap_name = os.path.join(output_path, config["heatmap_name"]) + + bws = " " + names = " " + + for k, v in config["bws"].items(): + bws += v+" " + names += k+" " + + deg_list =[] + deg_names = " " + if config["ultraheatmap_matrix_name"] != "": + for i, deg in enumerate( config["deg_list"]): + deg_list.append(os.path.join(config["path_2_rna"], deg)) + # deg_list = [os.path.join(path_2_rna, "nej_vs_wt_shrunk.tsv")] + deg_names += config["deg_names"][i]+" " + ultraheatmap_matrix_name = os.path.join(output_path, config["ultraheatmap_matrix_name"]) + + + if not args.heatmapOnly: + if scale_region: + cmd = "computeMatrix scale-regions " + cmd += " -S "+bws + cmd += " -R "+regions_to_plot + cmd += " -p 20 " + cmd += "-bs "+str(config["binsize"]) + cmd += " --skipZeros --regionBodyLength "+str(config["regionBodyLength"]) + cmd += " -a "+str(config["after_region"]) + cmd += " -b "+str(config["before_region"]) + cmd += " --missingDataAsZero --skipZeros --samplesLabel "+names + cmd += " -o "+ matrix_name + if config['averageTypeBins']: + cmd += ' --averageTypeBins '+config['averageTypeBins'] + print(cmd) + sp.check_output(cmd, shell = True) + else: + cmd = "computeMatrix reference-point --referencePoint "+config["refpoint"] + cmd += " -S "+bws + cmd += " -R "+regions_to_plot + cmd += " -p 20 --skipZeros " + cmd += " -a "+str(config["after_region"]) + cmd += " -b "+str(config["before_region"]) + cmd += " --missingDataAsZero --skipZeros --samplesLabel "+names + cmd += " -o "+ matrix_name + if config['averageTypeBins']: + cmd += ' --averageTypeBins '+config['averageTypeBins'] + sp.check_output(cmd, shell = True) + if config["ultraheatmap_matrix_name"] != "": + w_mapping = config["w_mapping"] + if w_mapping: + cmd = "addFeatureToMatrix " + cmd += "-m "+ matrix_name + cmd += " -o "+ultraheatmap_matrix_name + cmd += " -t "+" ".join(deg_list) + cmd += " --featureNames "+config['featureNames'] + cmd += " --referencePoint TSS " + cmd += " --genomeGtf "+config["gtf"] + sp.check_output(cmd, shell = True) + else: + cmd = "addFeatureToMatrix " + cmd += "-m "+ matrix_name + cmd += " -o "+ultraheatmap_matrix_name + cmd += " -t "+" ".join(deg_list) + if config['featureNames']: + cmd += " --featureNames "+config['featureNames'] + if config['featureIdColumn']: + cmd += " --featureIdColumn "+config['featureIdColumn'] + print(cmd) + sp.check_output(cmd, shell = True) + + + # plot heatmap + all_samples_names = names + matrix_to_plot = matrix_name + if config["ultraheatmap_matrix_name"] != "": + all_samples_names+= deg_names + matrix_to_plot = ultraheatmap_matrix_name + cmd = "plotHeatmap -m " + cmd += matrix_to_plot + if config["zmin"]: + cmd += " --zMin "+config["zmin"] + if config["zmax"]: + cmd+= " --zMax "+config["zmax"] + if config["colors"]: + cmd += " --colorMap "+config["colors"] + cmd += " -o "+heatmap_name + cmd += " --samplesLabel "+all_samples_names + cmd += " --whatToShow 'heatmap and colorbar' " + if sort_by_sample != "": + cmd += " --sortUsingSamples "+config["sort_by_sample"] + if config["sortUsing"]: + cmd += " --sortUsing "+config["sortUsing"] + if config["sorting_direction"] + "--sortRegions "+config["sorting_direction"] + if config["sorted_regions"] != "": + sorted_regions = os.path.join(output_path, config["sorted_regions"]) + cmd += " --outFileSortedRegions "+sorted_regions + if config["sorted_matrix"] != "": + sorted_matrix = os.path.join(output_path, config["sorted_matrix"]) + cmd += " --outFileNameMatrix "+sorted_matrix + if config["kmeans"]: + cmd += " --kmeans "+config["kmeans"] + if config["clusterUsingSamples"]: + cmd += " --clusterUsingSamples "+config["clusterUsingSamples"] + else: + if config["reg_labels"]: + cmd += " --regionsLabel "+config["reg_labels"] + print(cmd) + sp.check_output(cmd, shell = True) +if __name__ == "__main__": + main() diff --git a/script/multi_track_matrix_on_groseq_scaleregion-2000_with_deg_wo_mapping.yaml b/script/multi_track_matrix_on_groseq_scaleregion-2000_with_deg_wo_mapping.yaml new file mode 100755 index 0000000..09d0d99 --- /dev/null +++ b/script/multi_track_matrix_on_groseq_scaleregion-2000_with_deg_wo_mapping.yaml @@ -0,0 +1,36 @@ +bws: + bw1: "/path/to/bw1.bw" + bw2: "/path/to/bw2.bw" + +path_2_rna: "/path/to/rna" +regions_to_plot: "/path/region.bed" +# output params +output_path: "/path/to/output" +matrix_name: "output.gz" +scale_region: True +before_region: 2000 +after_region: 0 +regionBodyLength: 3500 +binsize: 10 +refpoint: "" +deg_list: ["rn1.tsv", "rna2.tsv"] +deg_names: ["rn1", "rna2"] +ultraheatmap_matrix_name: "appended_output.gz" +w_mapping: False +gtf:"" +featureIdColumn: 'GeneID' +featureNames: 'log2FoldChange' +zmin: "" #lowest intensity to plot +zmax: "" #highest intensity to plot +colors: "" #matplot lib colors +averageTypeBins: "" +sortUsing: "" +sort_by_sample: "" +sorting_direction: " descend " +reg_labels: " " +heatmap_name: "plotname.png" +sorted_regions: "" +sorted_matrix: "" +remove_na: False +kmeans: "" +clusterUsingSamples: "" diff --git a/ultraheatmap/__init__.py b/ultraheatmap/__init__.py index 72837bd..e398332 100644 --- a/ultraheatmap/__init__.py +++ b/ultraheatmap/__init__.py @@ -1 +1 @@ -__version__ = '1.3.1' +__version__ = '1.3.2' diff --git a/ultraheatmap/addFeatureToMatrix.py b/ultraheatmap/addFeatureToMatrix.py index c729f01..807ccd2 100755 --- a/ultraheatmap/addFeatureToMatrix.py +++ b/ultraheatmap/addFeatureToMatrix.py @@ -128,6 +128,7 @@ def main(): args.referencePoint, args.closestGenesOutput) # XXX instead of all these arguments i can simply add args. # paste an extra column per table to the input matrix + print("closest gene is found") extract_ge_folchange_per_peak(regions, args.tables, closestMapping, args.Features, args.idcolumn, hm) diff --git a/ultraheatmap/computeOrderedMatrix.py b/ultraheatmap/computeOrderedMatrix.py index f682d19..c3d6e4f 100644 --- a/ultraheatmap/computeOrderedMatrix.py +++ b/ultraheatmap/computeOrderedMatrix.py @@ -196,7 +196,7 @@ def main(): # 1. Read the default config file with open(os.path.join(configDir, "configs", "computeOrderedMatrix.yaml"), 'r') as stream: - defaultconfigfile = yaml.load(stream) + defaultconfigfile = yaml.safe_load(stream) # 2. Parse the arguments parser = parse_args(defaultconfigfile) diff --git a/ultraheatmap/parseTables.py b/ultraheatmap/parseTables.py index ceb7ccf..cbb4406 100755 --- a/ultraheatmap/parseTables.py +++ b/ultraheatmap/parseTables.py @@ -84,16 +84,18 @@ def extract_ge_folchange_per_peak(peaks, tables, closestMapping, features, Peaks = BedTool(peaks) Peaks=Peaks.sort() field_count = Peaks.field_count() - keyMap_closest = keymap_from_closest_genes(closestMapping, peaks, field_count) + keyMap_closest = keymap_from_closest_genes(closestMapping, peaks, field_count) ##TODO very inefficient!! + print("start updating the matrix") __update_matrix_values(peaks, keyMap_closest, tables,features,IdColumn,hm) def __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn): """ """ + print(table[IdColumn].values) count = 0 v = np.empty((len(peaks), len(features)), dtype=float) - for i, peak in enumerate(peaks): + for i, peak in enumerate(peaks): ##TODO get rid of the for loop key = ';'.join(map(str,peak)) value = keyMap_closest[key] if value in table[IdColumn].values: #value is geneId @@ -106,7 +108,8 @@ def __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn): v[i,j] = x else: v[i] = [ np.nan ]*len(features) - + + return v @@ -120,7 +123,7 @@ def __getValuesFromNameTable(peaks, table, features, IdColumn): name = peak[3] if name in table[IdColumn].values: for j, feature in enumerate(features): - x = float(table[table[IdColumn] == name][feature]) + x = float(table[table[IdColumn] == name][feature].iloc[0]) if np.isnan(x): x = np.nan v[i,j] = x @@ -138,7 +141,6 @@ def __update_matrix_values(peaks, keyMap_closest, tables, features, IdColumn, hm for i, table in enumerate(tables): table = parseTable(table) values = __getValuesFromGETable(peaks, keyMap_closest, table, features, IdColumn) - valuesTab[:,i*len(features):(i*len(features)+len(features))] = values for feature in features: hm.matrix.sample_labels = hm.matrix.sample_labels + ["table"+str(i)+"_"+feature] diff --git a/ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh b/ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh index a984e14..c241edf 100644 --- a/ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh +++ b/ultraheatmap/test/test_data/addFeatureToMatrix/addFeatureToMatrix.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -computeMatrix reference-point -S ../scores.bw -R ../regions.bed -o matrix.gz -addFeatureToMatrix -m matrix.gz -o appended_matrix.gz -t features.bed -f score1 score2 --featureIdColumn name +computeMatrix reference-point -S ultraheatmap/test/test_data/scores.bw -R ultraheatmap/test/test_data/regions.bed -o matrix.gz +addFeatureToMatrix -m matrix.gz -o appended_matrix.gz -t ultraheatmap/test/test_data/addFeatureToMatrix/features.bed -f score1 score2 --featureIdColumn name diff --git a/ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh b/ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh index a7fdd4c..2ec3f74 100644 --- a/ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh +++ b/ultraheatmap/test/test_data/computeOrderedMatrix/computeOrderedMatrix.sh @@ -1,2 +1,2 @@ #!/usr/bin/env bash -computeOrderedMatrix -S ../scores.bw ../scores.bw -R ../regions.bed -o matrix_ordered.gz -g 1 --kmean 2 +computeOrderedMatrix -S ultraheatmap/test/test_data/scores.bw ultraheatmap/test/test_data/scores.bw -R ultraheatmap/test/test_data/regions.bed -o matrix_ordered.gz -g 1 --kmean 2