Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement AlignmentMapper (#14) #15

Merged
merged 19 commits into from
Feb 20, 2023
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ linked-hash-map = "0.5.6"
nom = "7.1.3"
postgres = { version = "0.19.4", features = ["with-chrono-0_4"] }
pretty_assertions = "1.3.0"
regex = "1.7.1"
serde = { version = "1.0.152", features = ["derive"] }
serde_json = "1.0.93"
39 changes: 20 additions & 19 deletions src/data/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::static_data::Assembly;
/// aliases | AT1,ATA,ATC,ATD,ATE,ATDC,TEL1,TELO1
/// added | 2014-02-04 21:39:32.57125
/// ```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct GeneInfoRecord {
pub hgnc: String,
pub maploc: String,
Expand Down Expand Up @@ -44,7 +44,7 @@ pub struct GeneInfoRecord {
/// structure means that the transcripts are defined on the same
/// reference sequence and have the same exon spans on that
/// sequence.
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxSimilarityRecord {
/// Accession of first transcript.
pub tx_ac1: String,
Expand Down Expand Up @@ -81,7 +81,7 @@ pub struct TxSimilarityRecord {
/// alt_exon_id | 6063334
/// exon_aln_id | 3461425
///```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxExonsRecord {
pub hgnc: String,
pub tx_ac: String,
Expand Down Expand Up @@ -111,7 +111,7 @@ pub struct TxExonsRecord {
/// start_i | 95226307
/// end_i | 95248406
/// ```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxForRegionRecord {
pub tx_ac: String,
pub alt_ac: String,
Expand All @@ -130,7 +130,7 @@ pub struct TxForRegionRecord {
/// lengths | {707,79,410}
/// hgnc | VSX1
/// ```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxIdentityInfo {
pub tx_ac: String,
pub alt_ac: String,
Expand All @@ -149,7 +149,7 @@ pub struct TxIdentityInfo {
/// alt_ac | AC_000143.1
/// alt_aln_method | splign
/// ```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxInfoRecord {
pub hgnc: String,
pub cds_start_i: Option<i32>,
Expand All @@ -169,14 +169,15 @@ pub struct TxInfoRecord {
/// alt_ac | NC_000012.11
/// alt_aln_method | genebuild
/// ```
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Default, Clone)]
pub struct TxMappingOptionsRecord {
pub tx_ac: String,
pub alt_ac: String,
pub alt_aln_method: String,
}

pub trait Interface {
/// Interface for data providers.
pub trait Provider {
/// Return the data version, e.g., `uta_20180821`.
fn data_version(&self) -> &str;

Expand All @@ -198,22 +199,22 @@ pub trait Interface {
/// # Arguments
///
/// * `hgnc` - HGNC gene name
fn get_gene_info(&mut self, hgnc: &str) -> Result<GeneInfoRecord, anyhow::Error>;
fn get_gene_info(&self, hgnc: &str) -> Result<GeneInfoRecord, anyhow::Error>;

/// Return the (single) associated protein accession for a given transcript accession,
/// or None if not found.
///
/// # Arguments
///
/// * `tx_ac` -- transcript accession with version (e.g., 'NM_000051.3')
fn get_pro_ac_for_tx_ac(&mut self, tx_ac: &str) -> Result<Option<String>, anyhow::Error>;
fn get_pro_ac_for_tx_ac(&self, tx_ac: &str) -> Result<Option<String>, anyhow::Error>;

/// Return full sequence for the given accession.
///
/// # Arguments
///
/// * `ac` -- accession
fn get_seq(&mut self, ac: &str) -> Result<String, anyhow::Error>;
fn get_seq(&self, ac: &str) -> Result<String, anyhow::Error>;

/// Return sequence part for the given accession.
///
Expand All @@ -223,7 +224,7 @@ pub trait Interface {
/// * `start` -- start position (0-based, start of sequence if missing)
/// * `end` -- end position (0-based, end of sequence if missing)
fn get_seq_part(
&mut self,
&self,
ac: &str,
begin: Option<usize>,
end: Option<usize>,
Expand All @@ -236,7 +237,7 @@ pub trait Interface {
///
/// * `tx_ac` -- transcript accession with version (e.g., 'NM_000051.3')
fn get_similar_transcripts(
&mut self,
&self,
tx_ac: &str,
) -> Result<Vec<TxSimilarityRecord>, anyhow::Error>;

Expand All @@ -249,7 +250,7 @@ pub trait Interface {
/// * `alt_ac` -- specific genomic sequence (e.g., NC_000011.4)
/// * `alt_aln_method` -- sequence alignment method (e.g., splign, blat)
fn get_tx_exons(
&mut self,
&self,
tx_ac: &str,
alt_ac: &str,
alt_aln_method: &str,
Expand All @@ -260,7 +261,7 @@ pub trait Interface {
/// # Arguments
///
/// * `gene` - HGNC gene name
fn get_tx_for_gene(&mut self, gene: &str) -> Result<Vec<TxInfoRecord>, anyhow::Error>;
fn get_tx_for_gene(&self, gene: &str) -> Result<Vec<TxInfoRecord>, anyhow::Error>;

/// Return transcripts that overlap given region.
///
Expand All @@ -271,7 +272,7 @@ pub trait Interface {
// * `start_i` -- 5' bound of region
// * `end_i` -- 3' bound of region
fn get_tx_for_region(
&mut self,
&self,
alt_ac: &str,
alt_aln_method: &str,
start_i: i32,
Expand All @@ -283,7 +284,7 @@ pub trait Interface {
/// # Arguments
///
/// * `tx_ac` -- transcript accession with version (e.g., 'NM_199425.2')
fn get_tx_identity_info(&mut self, tx_ac: &str) -> Result<TxIdentityInfo, anyhow::Error>;
fn get_tx_identity_info(&self, tx_ac: &str) -> Result<TxIdentityInfo, anyhow::Error>;

/// Return a single transcript info for supplied accession (tx_ac, alt_ac, alt_aln_method), or None if not found.
///
Expand All @@ -293,7 +294,7 @@ pub trait Interface {
/// * `alt_ac -- specific genomic sequence (e.g., NC_000011.4)
/// * `alt_aln_method` -- sequence alignment method (e.g., splign, blat)
fn get_tx_info(
&mut self,
&self,
tx_ac: &str,
alt_ac: &str,
alt_aln_method: &str,
Expand All @@ -308,7 +309,7 @@ pub trait Interface {
///
/// * `tx_ac` -- transcript accession with version (e.g., 'NM_000051.3')
fn get_tx_mapping_options(
&mut self,
&self,
tax_ac: &str,
) -> Result<Vec<TxMappingOptionsRecord>, anyhow::Error>;
}
7 changes: 2 additions & 5 deletions src/data/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
///! Datatypes, interfaces, and data acess.
mod interface;
mod uta;

pub use interface::*;
pub use uta::*;
pub mod interface;
pub mod uta;
Loading