Skip to content

Commit

Permalink
Major tidy up
Browse files Browse the repository at this point in the history
  • Loading branch information
Addimator committed Aug 15, 2024
1 parent fb51bec commit b8392d4
Show file tree
Hide file tree
Showing 31 changed files with 317 additions and 583 deletions.
23 changes: 7 additions & 16 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,35 +1,26 @@

# https://github.com/varlociraptor/varlociraptor-methylation-evaluation

resources:
ref:
species: "homo_sapiens"
datatype: "dna"
build: "GRCh38"
release: "110"
release: "112"
chromosome: "21"
ontology:
# gene ontology to download, used e.g. in goatools
gene_ontology: "http://current.geneontology.org/ontology/go-basic.obo"


#
scatter_items: 100

# base_dir: "/home/adrian//Documents/Promotion/"
# varlo_path: "/home/adrian//Documents/Promotion/varlociraptor/"
# pipeline_path: "/home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template"
# sample_path: "/home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template/resources/sample.tsv"


base_dir: "/projects/koesterlab/benchmark-methylation/data_analysis_jochen"
varlo_path: "/projects/koesterlab/benchmark-methylation/data_analysis_jochen/varlociraptor/"
pipeline_path: "/projects/koesterlab/benchmark-methylation/data_analysis_jochen/compare_meth/"
sample_path: "/projects/koesterlab/benchmark-methylation/data_analysis_jochen/compare_meth/resources/sample.tsv"
# sample_path: "/home/adrian/Documents/Promotion/data_analysis_jochen/compare_meth/resources/sample.tsv"
# Path of the sample file
sample_path: "/projects/koesterlab/benchmark-methylation/data_analysis_jochen/compare_meth/config/sample.tsv"

# Name of base experiment
ref_sample: "BC01-ref-sorted"
ref_sample_number: "BC01"

# Not used right now
enrichment:
goatools:
# tool is only run if set to `true`
Expand All @@ -53,7 +44,7 @@ enrichment:
# Only required if you want to have a gene analysis for your pathways. Else NA
orgDb: org.Hs.eg.db


# Not used right now
meta_comparisons:
# comparison is only run if set to `true`
activate: true
Expand Down
5 changes: 5 additions & 0 deletions config/sample.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name path sequencer
BC01-ref-sorted /home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template/resources/alignments/BC01-ref-sorted.bam Nanopore
BC02-ref-sorted /home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template/resources/alignments/BC02-ref-sorted.bam Nanopore
BC03-ref-sorted /home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template/resources/alignments/BC03-ref-sorted.bam Nanopore
BC04-ref-sorted /home/adrian/Documents/Promotion/data_analysis/snakemake-workflow-template/resources/alignments/BC04-ref-sorted.bam Nanopore
Empty file removed output.sam
Empty file.
2 changes: 0 additions & 2 deletions temp.sam

This file was deleted.

19 changes: 1 addition & 18 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,11 @@ configfile: "config/config.yaml"
include: "rules/common.smk"
include: "rules/dmr_heatmap.smk"
include: "rules/dmr_calling.smk"
include: "rules/get_data.smk"
include: "rules/prepare_meth_calling.smk"
include: "rules/methylation_calling.smk"
include: "rules/annotate_dmrs.smk"


def read_sample_tsv(sample_tsv_path):
samples = {}
with open(sample_tsv_path, "r") as file:
next(file)
for line in file:
name, path, sequencer = line.strip().split("\t")
samples[name] = (path, sequencer)
return samples


sample_tsv_path = config["sample_path"]
samples = read_sample_tsv(sample_tsv_path)


chromosome_conf = config["resources"]["ref"]


# declare https://github.com/Addimator/enrichment as a module
module enrichment:
snakefile:
Expand Down
4 changes: 2 additions & 2 deletions workflow/envs/chipseeker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ channels:
dependencies:
- bioconductor-genomicranges=1.54.1
- bioconductor-chipseeker=1.38.0
- bioconductor-genomicfeatures=1.54.1
- r-biocmanager=1.30.23
- r-tidyverse=2.0.0
- bioconductor-genomicfeatures=1.54.1
- r-tidyverse=2.0.0
5 changes: 0 additions & 5 deletions workflow/envs/genomation.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions workflow/envs/plot.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
channels:
- conda-forge
dependencies:
- altair=5.1.1
- vl-convert-python=0.13.1
- pysam=0.21.0
- vegafusion=1.6.7
- vega_datasets=0.9.0
- matplotlib=3.8.4
- pandas=2.2.2
- scikit-learn=1.5.1
- seaborn=0.13.2
- numpy
- fastcluster=1.2.6
File renamed without changes.
1 change: 1 addition & 0 deletions workflow/report/annotations.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Annotation of DMRs with genetic and regulatory elements.
1 change: 1 addition & 0 deletions workflow/report/heatmap.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Comparison of the different DMRs between the respective experiments and the base experiment. On the x-axis are the experiments, on the y-axis are the different gene regions (found by `Chipseeker <https://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html>`_). If there are several DMRs within an experiment in a gene region, the maximum was selected; if an experiment has no DMRs in a gene region, the value was set to 0.
1 change: 1 addition & 0 deletions workflow/report/metilene_plots.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Basic DMR statistics from `Metilene <http://legacy.bioinf.uni-leipzig.de/Software/metilene/Manual/#filter_output_file_and_plot_basic_dmr_statistics>`_.
3 changes: 2 additions & 1 deletion workflow/report/workflow.rst
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Differentially methylated region analysis!
Pairwise Differentially methylated region (DMR) calling between a base experiment and any number of other experiments. The DMRs were found with `Metilene <http://legacy.bioinf.uni-leipzig.de/Software/metilene/>`_ and subsequently annotated with genetic and regulatory elements.
For visualisation we use the standard plots of metilene, as well as a heatmap comparing the different DMRs between the experiments.
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@ datasets:
genes_transcripts:
path: ?input.genes_transcripts
separator: "\t"
regulations:
path: ?input.regulations
regulatory_elements:
path: ?input.regulatory_elements
separator: "\t"
default-view: genes_transcripts
views:
genes_transcripts:
dataset: genes_transcripts
desc: |
Differentially methylated region between the two experiments under consideration annotated with genes/transcripts.
page-size: 25
desc: ?f"Differentially methylated regions (DMRs) between the experiments {params.base_experiment} and {wildcards.group2}. The DMRs were annotated with genomic elements using chipseeker (https://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html ).\n
The table is structured as follows; \n
The chr, start_dmr, and end_dmr columns give the exact position of the DMR. The mean_methylation_difference column shows the difference in methylation between the two experiments. The annotation and transcriptID columns show genetic elements, while the absolute_signed_pi_val is the absolute value of the signed version of the pi-value score of the mean methylation difference (as proposed by Xiao et al. 2014 https://pubmed.ncbi.nlm.nih.gov/22321699/ ). \n
The table is sorted in descending order by the absolute values of the signed versions of the pi-value score. Thus, values with small q-value but large methylation difference (and therefore values of importance) appear first."
render-table:
columns:
chr:
Expand Down Expand Up @@ -54,19 +55,24 @@ views:
heatmap:
scale: ordinal
color-scheme: tableau20
# source:
# plot:
# heatmap:
# scale: ordinal
# color-scheme: tableau20
absolute_signed_pi_val:
plot:
heatmap:
scale: linear
range:
- "white"
- "#6baed6"
transcriptId:
link-to-url:
Ensembl:
url: "http://www.ensembl.org/Homo_sapiens/Gene/Summary?g={transcript_id}"
regulations:
dataset: regulations
desc: |
Differentially methylated region between the two experiments under consideration annotated with genes/transcripts.
url: "http://www.ensembl.org/Homo_sapiens/Gene/Summary?g={transcriptId}"
regulatory_elements:
dataset: regulatory_elements
desc: ?f"Differentially methylated regions (DMRs) between the experiments {params.base_experiment} and {wildcards.group2}. The DMRs were annotated with genomic elements using chipseeker (https://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html ).\n
The table is structured as follows; \n
The chr, start_dmr, and end_dmr columns give the exact position of the DMR. The mean_methylation_difference column shows the difference in methylation between the two experiments. The type, gene_name and gene_biotype columns show regulatory elements, while the absolute_signed_pi_val is the absolute value of the signed version of the pi-value score of the mean methylation difference (as proposed by Xiao et al. 2014 https://pubmed.ncbi.nlm.nih.gov/22321699/ ). \n
The table is sorted in descending order by the absolute values of the signed versions of the pi-value score. Thus, values with small q-value but large methylation difference (and therefore values of importance) appear first."

page-size: 25
render-table:
columns:
Expand Down Expand Up @@ -120,7 +126,7 @@ views:
type:
link-to-url:
Ensembl:
url: "https://www.ensembl.org/Homo_sapiens/Regulation/Summary?fdb=funcgen;r=7:50781435-50784400;rf={ID}"
url: "https://www.ensembl.org/Homo_sapiens/regulatory_elements/Summary?fdb=funcgen;r=7:50781435-50784400;rf={ID}"
source:
plot:
heatmap:
Expand Down Expand Up @@ -152,4 +158,9 @@ views:
scale: ordinal
color-scheme: tableau20
absolute_signed_pi_val:
display-mode: hidden
plot:
heatmap:
scale: linear
range:
- "white"
- "#6baed6"
Loading

0 comments on commit b8392d4

Please sign in to comment.