Skip to content

Commit

Permalink
added find_isoform
Browse files Browse the repository at this point in the history
  • Loading branch information
OliverVoogd committed Jul 5, 2023
1 parent 0a94839 commit e3213e1
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 0 deletions.
79 changes: 79 additions & 0 deletions src/main-functions/find_isoform.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include "find_isoform.h"

#include <string>
#include <unordered_map>
#include <vector>
#include <utility>

#include "../classes/GFFData.h"
#include "../classes/GeneAnnotationParser.h"
#include "../classes/junctions.h"
#include "../classes/Pos.h"
#include "get_transcript_seq.h"
#include "group_bam2isoform.h"

void
find_isoform
(
const std::string &gff3,
const std::string &genome_bam,
const std::string &isoform_gff3,
const std::string &tss_tes_stat,
const std::string &genomefa,
const std::string &transcript_fa,
const Rcpp::List &isoform_parameters,
const std::string &raw_splice_isoform)
{
Rcpp::Rcout << "#### Reading Gene Annotations\n";

GFFData gene_annotation = parse_gff_file(gff3);

std::unordered_map<std::string, Junctions>
transcript_to_junctions = map_transcripts_to_junctions(
gene_annotation.transcript_to_exon
);

gene_annotation.gene_to_transcript = remove_similar_tr(
gene_annotation.gene_to_transcript,
gene_annotation.transcript_to_exon,
10
);

std::unordered_map<std::string, std::vector<exon>>
gene_dict = get_gene_flat(
gene_annotation.gene_to_transcript,
gene_annotation.transcript_to_exon
);

std::unordered_map<std::string, std::vector<GeneBlocks>>
chr_to_blocks = get_gene_blocks(
gene_dict,
gene_annotation.chr_to_gene,
gene_annotation.gene_to_transcript
);

// GROUP_BAM2ISOFORM
group_bam2isoform(
genome_bam,
isoform_gff3,
tss_tes_stat,
gene_dict,
transcript_to_junctions,
gene_annotation.transcript_dict,
chr_to_blocks,
genomefa,
isoform_parameters,
raw_splice_isoform
);

GFFData isoform_annotation = parse_gff_file(isoform_gff3);

// get_transcript_seq
get_transcript_seq(
genomefa,
transcript_fa,
isoform_annotation,
gene_annotation
); // - This does not modify values that are used later (it modifies chr_to_blocks, but only transcript_dict_i and transcript_dict are used later.)
return;
}
22 changes: 22 additions & 0 deletions src/main-functions/find_isoform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef FIND_ISOFORM_H
#define FIND_ISOFORM_H

#include <Rcpp.h>

#include <string>

#include "../classes/Pos.h"

void
find_isoform
(
const std::string &gff3,
const std::string &genome_bam,
const std::string &isoform_gff3,
const std::string &tss_tes_stat,
const std::string &genomefa,
const std::string &transcript_fa,
const Rcpp::List &isoform_parameters,
const std::string &raw_splice_isoform);

#endif // FIND_ISOFORM_H

0 comments on commit e3213e1

Please sign in to comment.