Skip to content

Commit

Permalink
Fix bug which prevents selection of specific constant chains
Browse files Browse the repository at this point in the history
Signed-off-by: Douwe Schulte <d.schulte@uu.nl>
  • Loading branch information
douweschulte committed Mar 22, 2024
1 parent 0819ec6 commit b002b13
Show file tree
Hide file tree
Showing 35 changed files with 110 additions and 11 deletions.
Binary file modified rustyms/src/imgt/germlines/Alpaca.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Arabian camel.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Atlantic salmon.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Bornean orangutan.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Channel catfish.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Cook's mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Crab-eating macaque.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic bovine.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic cat.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic chicken.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic dog.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic ferret.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic goat.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic horse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic pig.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Domestic sheep.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/House mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Human.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Norway rat.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Platypus.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Rabbit.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Rainbow trout.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Rhesus monkey.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Ring-tailed lemur.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Shrew mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Southern African pygmy mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Spiny mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Sumatran orangutan.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Western European house mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Western gorilla.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Western lowland gorilla.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Western wild mouse.bin
Binary file not shown.
Binary file modified rustyms/src/imgt/germlines/Zebrafish.bin
Binary file not shown.
63 changes: 60 additions & 3 deletions rustyms/src/imgt/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ impl Selection {
.flat_map(|g| g.into_iter().map(|c| (g.species, c.0, c.1)))
.filter(move |(_, kind, _)| self.chains.as_ref().map_or(true, |k| k.contains(kind)))
.flat_map(|(species, _, c)| c.into_iter().map(move |g| (species, g.0, g.1)))
.filter(move |(_, gene, _)| self.genes.as_ref().map_or(true, |s| s.contains(gene)))
.filter(move |(_, gene, _)| {
self.genes
.as_ref()
.map_or(true, |s| contains_gene(s, *gene))
})
.flat_map(|(species, _, germlines)| germlines.iter().map(move |a| (species, a)))
.flat_map(move |(species, germline)| {
germline
Expand All @@ -97,7 +101,11 @@ impl Selection {
.flat_map(|g| g.into_par_iter().map(|c| (g.species, c.0, c.1)))
.filter(move |(_, kind, _)| self.chains.as_ref().map_or(true, |k| k.contains(kind)))
.flat_map(|(species, _, c)| c.into_par_iter().map(move |g| (species, g.0, g.1)))
.filter(move |(_, gene, _)| self.genes.as_ref().map_or(true, |s| s.contains(gene)))
.filter(move |(_, gene, _)| {
self.genes
.as_ref()
.map_or(true, |s| contains_gene(s, *gene))
})
.flat_map(|(species, _, germlines)| {
germlines.into_par_iter().map(move |a| (species, a))
})
Expand All @@ -111,6 +119,10 @@ impl Selection {
}
}

fn contains_gene(s: &HashSet<GeneType>, gene: GeneType) -> bool {
s.contains(&gene) || matches!(gene, GeneType::C(_)) && s.contains(&GeneType::C(None))
}

impl Default for Selection {
/// Get a default selection, which gives all kinds and genes but only returns the first allele
fn default() -> Self {
Expand Down Expand Up @@ -220,7 +232,14 @@ impl Germlines {
let genes = match gene.gene {
GeneType::V => &chain.variable,
GeneType::J => &chain.joining,
GeneType::C(_) => &chain.constant,
GeneType::C(None) => &chain.c,
GeneType::C(Some(Constant::A)) => &chain.a,
GeneType::C(Some(Constant::D)) => &chain.d,
GeneType::C(Some(Constant::E)) => &chain.e,
GeneType::C(Some(Constant::G)) => &chain.g,
GeneType::C(Some(Constant::M)) => &chain.m,
GeneType::C(Some(Constant::O)) => &chain.o,
GeneType::C(Some(Constant::T)) => &chain.t,
};
genes
.binary_search_by(|g| g.name.cmp(&gene))
Expand All @@ -245,6 +264,10 @@ impl Germlines {
#[cfg(test)]
#[allow(clippy::missing_panics_doc)]
mod tests {
use std::collections::HashSet;

use crate::imgt::select::contains_gene;

use super::Selection;
use super::{ChainType, GeneType, Species};

Expand All @@ -257,4 +280,38 @@ mod tests {
let first = selection.germlines().next().unwrap();
assert_eq!(first.name(), "IGHV1-2*01");
}

#[test]
fn try_first_g_human() {
let selection = Selection::default()
.species([Species::HomoSapiens])
.chain([ChainType::Heavy])
.gene([GeneType::C(Some(crate::imgt::Constant::G))]);
let first = selection.germlines().next().unwrap();
assert_eq!(first.name(), "IGG1");
}

#[test]
fn gene_selections() {
let constant = HashSet::from([GeneType::C(None)]);
assert!(contains_gene(&constant, GeneType::C(None)));
assert!(contains_gene(
&constant,
GeneType::C(Some(crate::imgt::Constant::G))
));
assert!(contains_gene(
&constant,
GeneType::C(Some(crate::imgt::Constant::A))
));
let constant_g = HashSet::from([GeneType::C(Some(crate::imgt::Constant::G))]);
assert!(!contains_gene(&constant_g, GeneType::C(None)));
assert!(contains_gene(
&constant_g,
GeneType::C(Some(crate::imgt::Constant::G))
));
assert!(!contains_gene(
&constant_g,
GeneType::C(Some(crate::imgt::Constant::A))
));
}
}
58 changes: 50 additions & 8 deletions rustyms/src/imgt/shared/regions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,14 @@ impl<'a> IntoParallelIterator for &'a Germlines {
pub(crate) struct Chain {
pub variable: Vec<Germline>,
pub joining: Vec<Germline>,
pub constant: Vec<Germline>,
pub c: Vec<Germline>,
pub a: Vec<Germline>,
pub d: Vec<Germline>,
pub e: Vec<Germline>,
pub g: Vec<Germline>,
pub m: Vec<Germline>,
pub o: Vec<Germline>,
pub t: Vec<Germline>,
}

impl Chain {
Expand All @@ -83,7 +90,14 @@ impl Chain {
let db = match &germline.name.gene {
GeneType::V => &mut self.variable,
GeneType::J => &mut self.joining,
GeneType::C(_) => &mut self.constant,
GeneType::C(None) => &mut self.c,
GeneType::C(Some(Constant::A)) => &mut self.a,
GeneType::C(Some(Constant::D)) => &mut self.d,
GeneType::C(Some(Constant::E)) => &mut self.e,
GeneType::C(Some(Constant::G)) => &mut self.g,
GeneType::C(Some(Constant::M)) => &mut self.m,
GeneType::C(Some(Constant::O)) => &mut self.o,
GeneType::C(Some(Constant::T)) => &mut self.t,
};

match db.binary_search_by_key(&germline.name, |g| g.name.clone()) {
Expand Down Expand Up @@ -141,36 +155,64 @@ impl Chain {
self.variable.iter().map(|g| g.alleles.len()).sum::<usize>(),
self.joining.len(),
self.joining.iter().map(|g| g.alleles.len()).sum::<usize>(),
self.constant.len(),
self.constant.iter().map(|g| g.alleles.len()).sum::<usize>(),
self.c.len()
+ self.a.len()
+ self.d.len()
+ self.e.len()
+ self.g.len()
+ self.m.len()
+ self.o.len()
+ self.t.len(),
self.c.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.a.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.d.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.e.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.g.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.m.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.o.iter().map(|g| g.alleles.len()).sum::<usize>()
+ self.t.iter().map(|g| g.alleles.len()).sum::<usize>(),
)
}
}

impl<'a> IntoIterator for &'a Chain {
type IntoIter = std::array::IntoIter<(GeneType, &'a [Germline]), 3>;
type IntoIter = std::array::IntoIter<(GeneType, &'a [Germline]), 10>;
type Item = (GeneType, &'a [Germline]);

fn into_iter(self) -> Self::IntoIter {
[
(GeneType::V, self.variable.as_slice()),
(GeneType::J, self.joining.as_slice()),
(GeneType::C(None), self.constant.as_slice()),
(GeneType::C(None), self.c.as_slice()),
(GeneType::C(Some(Constant::A)), self.a.as_slice()),
(GeneType::C(Some(Constant::D)), self.d.as_slice()),
(GeneType::C(Some(Constant::E)), self.e.as_slice()),
(GeneType::C(Some(Constant::G)), self.g.as_slice()),
(GeneType::C(Some(Constant::M)), self.m.as_slice()),
(GeneType::C(Some(Constant::O)), self.o.as_slice()),
(GeneType::C(Some(Constant::T)), self.t.as_slice()),
]
.into_iter()
}
}

#[cfg(feature = "rayon")]
impl<'a> IntoParallelIterator for &'a Chain {
type Iter = rayon::array::IntoIter<(GeneType, &'a [Germline]), 3>;
type Iter = rayon::array::IntoIter<(GeneType, &'a [Germline]), 10>;
type Item = (GeneType, &'a [Germline]);

fn into_par_iter(self) -> Self::Iter {
[
(GeneType::V, self.variable.as_slice()),
(GeneType::J, self.joining.as_slice()),
(GeneType::C(None), self.constant.as_slice()),
(GeneType::C(None), self.c.as_slice()),
(GeneType::C(Some(Constant::A)), self.a.as_slice()),
(GeneType::C(Some(Constant::D)), self.d.as_slice()),
(GeneType::C(Some(Constant::E)), self.e.as_slice()),
(GeneType::C(Some(Constant::G)), self.g.as_slice()),
(GeneType::C(Some(Constant::M)), self.m.as_slice()),
(GeneType::C(Some(Constant::O)), self.o.as_slice()),
(GeneType::C(Some(Constant::T)), self.t.as_slice()),
]
.into_par_iter()
}
Expand Down

0 comments on commit b002b13

Please sign in to comment.