Skip to content

Commit

Permalink
simd and benches
Browse files Browse the repository at this point in the history
  • Loading branch information
wang-q committed Sep 27, 2024
1 parent c1f5d03 commit 0c0ff77
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 46 deletions.
38 changes: 31 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,24 +141,24 @@ nwr common "Escherichia coli" 4932 Drosophila_melanogaster 9606 "Mus musculus"

```shell
# Concurrent tests may trigger sqlite locking
cargo test -- --test-threads=1
cargo +nightly test -- --test-threads=1

cargo test --color=always --package nwr --test cli_nwr command_template -- --show-output
cargo +nightly test --color=always --package nwr --test cli_nwr command_template -- --show-output

# rustup update -- nightly
cargo +nightly bench --bench simd

# debug mode has a slow connection
cargo run --release --bin nwr download
cargo +nightly run --release --bin nwr download

# tests/nwr/
cargo run --bin nwr txdb -d tests/nwr/
cargo +nightly run --bin nwr txdb -d tests/nwr/

cargo run --bin nwr info -d tests/nwr/ --tsv Viruses "Actinophage JHJ-1" "Bacillus phage bg1"
cargo +nightly run --bin nwr info -d tests/nwr/ --tsv Viruses "Actinophage JHJ-1" "Bacillus phage bg1"

cargo run --bin nwr common -d tests/nwr/ "Actinophage JHJ-1" "Bacillus phage bg1"
cargo +nightly run --bin nwr common -d tests/nwr/ "Actinophage JHJ-1" "Bacillus phage bg1"

cargo run --bin nwr template tests/assembly/Trichoderma.assembly.tsv --ass -o stdout
cargo +nightly run --bin nwr template tests/assembly/Trichoderma.assembly.tsv --ass -o stdout

```

Expand All @@ -171,8 +171,32 @@ cargo run --bin nwr similarity tests/assembly/domain.tsv --mode cosine --bin

cargo run --bin nwr similarity tests/assembly/domain.tsv --mode jaccard --bin

hyperfine --warmup 1 \
-n p1 \
'nwr similarity data/Domian_content_1000.tsv --parallel 1 --mode jaccard --bin > /dev/null' \
-n p2 \
'nwr similarity data/Domian_content_1000.tsv --parallel 2 --mode jaccard --bin > /dev/null' \
-n p3 \
'nwr similarity data/Domian_content_1000.tsv --parallel 3 --mode jaccard --bin > /dev/null' \
-n p4 \
'nwr similarity data/Domian_content_1000.tsv --parallel 4 --mode jaccard --bin > /dev/null' \
-n p6 \
'nwr similarity data/Domian_content_1000.tsv --parallel 6 --mode jaccard --bin > /dev/null' \
-n p8 \
'nwr similarity data/Domian_content_1000.tsv --parallel 8 --mode jaccard --bin > /dev/null' \
--export-markdown sim.md.tmp

```

| Command | Mean [s] | Min [s] | Max [s] | Relative |
|:--------|---------------:|--------:|--------:|------------:|
| `p1` | 17.364 ± 1.244 | 16.065 | 20.367 | 2.11 ± 0.18 |
| `p2` | 10.467 ± 1.405 | 9.421 | 14.045 | 1.27 ± 0.18 |
| `p3` | 8.226 ± 0.380 | 7.615 | 8.722 | 1.00 |
| `p4` | 8.430 ± 0.842 | 7.777 | 10.614 | 1.02 ± 0.11 |
| `p6` | 8.330 ± 0.827 | 7.648 | 10.371 | 1.01 ± 0.11 |
| `p8` | 10.268 ± 2.407 | 8.415 | 15.486 | 1.25 ± 0.30 |

### Newick files and LaTeX

For more detailed usages, check [this file](tree/README.md).
Expand Down
125 changes: 95 additions & 30 deletions src/cmd_nwr/similarity.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use clap::*;
use std::io::BufRead;
use std::simd::prelude::*;

const LANES: usize = 8; // 32 * 8 = 256, AVX2

// Create clap subcommand arguments
pub fn make_subcommand() -> Command {
Expand Down Expand Up @@ -121,7 +124,7 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
};

// Channel 1 - Entries
let (snd1, rcv1) = crossbeam::channel::bounded::<(nwr::AsmEntry, nwr::AsmEntry)>(10);
let (snd1, rcv1) = crossbeam::channel::bounded::<(&nwr::AsmEntry, &nwr::AsmEntry)>(10);
// Channel 2 - Results
let (snd2, rcv2) = crossbeam::channel::bounded::<String>(10);

Expand All @@ -132,7 +135,7 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
s.spawn(|_| {
for e1 in &entries {
for e2 in &others {
snd1.send((e1.clone(), e2.clone())).unwrap();
snd1.send((e1, e2)).unwrap();
}
}
// Close the channel - this is necessary to exit the for-loop in the worker
Expand Down Expand Up @@ -184,15 +187,15 @@ fn load_file(infile: &str, is_bin: bool) -> Vec<nwr::AsmEntry> {
.list()
.iter()
.map(|e| if *e > 0.0 { 1.0 } else { 0.0 })
.collect::<Vec<f64>>();
.collect::<Vec<f32>>();
entry = nwr::AsmEntry::from(entry.name(), &bin_list);
}
entries.push(entry);
}
entries
}

fn calc(l1: &[f64], l2: &[f64], mode: &str, is_sim: bool, is_dis: bool) -> f64 {
fn calc(l1: &[f32], l2: &[f32], mode: &str, is_sim: bool, is_dis: bool) -> f32 {
let mut score = match mode {
"euclid" => euclidean_distance(l1, l2),
"cosine" => cosine_similarity(l1, l2),
Expand All @@ -212,25 +215,61 @@ fn calc(l1: &[f64], l2: &[f64], mode: &str, is_sim: bool, is_dis: bool) -> f64 {

// https://www.maartengrootendorst.com/blog/distances/
// https://crates.io/crates/semanticsimilarity_rs
fn euclidean_distance(v1: &[f64], v2: &[f64]) -> f64 {
v1.iter()
.zip(v2.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f64>()
.sqrt()
fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
let (a_extra, a_chunks): (&[f32], &[[f32; LANES]]) = a.as_rchunks();
let (b_extra, b_chunks): (&[f32], &[[f32; LANES]]) = b.as_rchunks();

let mut sums = [0.0; LANES];
for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
let diff = x - y;
*d = diff * diff;
}

let mut sums = f32x8::from_array(sums);
std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
let diff = f32x8::from_array(*x) - f32x8::from_array(*y);
sums += diff * diff;
});

sums.reduce_sum().sqrt()
}

fn dot_product(v1: &[f64], v2: &[f64]) -> f64 {
v1.iter().zip(v2.iter()).map(|(a, b)| a * b).sum()
fn dot_product(a: &[f32], b: &[f32]) -> f32 {
let (a_extra, a_chunks): (&[f32], &[[f32; LANES]]) = a.as_rchunks();
let (b_extra, b_chunks): (&[f32], &[[f32; LANES]]) = b.as_rchunks();

let mut sums = [0.0; LANES];
for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
*d = x * y;
}

let mut sums = f32x8::from_array(sums);
std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
sums += f32x8::from_array(*x) * f32x8::from_array(*y);
});

sums.reduce_sum()
}

fn norm(v1: &[f64]) -> f64 {
v1.iter().map(|x| x.powi(2)).sum::<f64>().sqrt()
fn norm(a: &[f32]) -> f32 {
let (a_extra, a_chunks): (&[f32], &[[f32; LANES]]) = a.as_rchunks();

let mut sums = [0.0; LANES];
for (x, d) in std::iter::zip(a_extra, &mut sums) {
*d = x * x;
}

let mut sums = f32x8::from_array(sums);
a_chunks.into_iter().for_each(|x| {
sums += f32x8::from_array(*x) * f32x8::from_array(*x);
});

sums.reduce_sum().sqrt()
}

fn cosine_similarity(v1: &[f64], v2: &[f64]) -> f64 {
let dot_product = dot_product(v1, v2);
let denominator = norm(v1) * norm(v2);
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
let dot_product = dot_product(a, b);
let denominator = norm(a) * norm(b);

if denominator == 0.0 {
0.0
Expand All @@ -239,17 +278,43 @@ fn cosine_similarity(v1: &[f64], v2: &[f64]) -> f64 {
}
}

fn weighted_jaccard_similarity(v1: &[f64], v2: &[f64]) -> f64 {
let numerator = v1
.iter()
.zip(v2.iter())
.map(|(a, b)| f64::min(*a, *b))
.sum::<f64>();
let denominator = v1
.iter()
.zip(v2.iter())
.map(|(a, b)| f64::max(*a, *b))
.sum::<f64>();
fn jaccard_intersection(a: &[f32], b: &[f32]) -> f32 {
let (a_extra, a_chunks): (&[f32], &[[f32; LANES]]) = a.as_rchunks();
let (b_extra, b_chunks): (&[f32], &[[f32; LANES]]) = b.as_rchunks();

let mut sums = [0.0; LANES];
for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
*d = f32::min(*x, *y);
}

let mut sums = f32x8::from_array(sums);
std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
sums += f32x8::simd_min( f32x8::from_array(*x), f32x8::from_array(*y));
});

sums.reduce_sum()
}

fn jaccard_union(a: &[f32], b: &[f32]) -> f32 {
let (a_extra, a_chunks): (&[f32], &[[f32; LANES]]) = a.as_rchunks();
let (b_extra, b_chunks): (&[f32], &[[f32; LANES]]) = b.as_rchunks();

let mut sums = [0.0; LANES];
for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
*d = f32::max(*x, *y);
}

let mut sums = f32x8::from_array(sums);
std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
sums += f32x8::simd_max( f32x8::from_array(*x), f32x8::from_array(*y));
});

sums.reduce_sum()
}

fn weighted_jaccard_similarity(a: &[f32], b: &[f32]) -> f32 {
let numerator = jaccard_intersection(a, b);
let denominator = jaccard_union(a, b);

if denominator == 0.0 {
0.0
Expand All @@ -261,10 +326,10 @@ fn weighted_jaccard_similarity(v1: &[f64], v2: &[f64]) -> f64 {
// Schölkopf, B. (2000). The kernel trick for distances. In Neural Information Processing Systems, pages 301-307.
// https://stats.stackexchange.com/questions/146309/turn-a-distance-measure-into-a-kernel-function
// https://stats.stackexchange.com/questions/158279/how-i-can-convert-distance-euclidean-to-similarity-score
fn d2s(dist: f64) -> f64 {
fn d2s(dist: f32) -> f32 {
1.0 / dist.abs().exp()
}

fn dis(dist: f64) -> f64 {
fn dis(dist: f32) -> f32 {
1.0 - dist
}
18 changes: 9 additions & 9 deletions src/libs/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ fn format_node(node: &Node) -> String {
#[derive(Default, Clone)]
pub struct AsmEntry {
name: String,
list: Vec<f64>,
list: Vec<f32>,
}

impl AsmEntry {
// Immutable accessors
pub fn name(&self) -> &String {
&self.name
}
pub fn list(&self) -> &Vec<f64> {
pub fn list(&self) -> &Vec<f32> {
&self.list
}

Expand All @@ -124,12 +124,12 @@ impl AsmEntry {
/// ```
/// # use nwr::AsmEntry;
/// let name = "Es_coli_005008_GCF_013426115_1".to_string();
/// let list : Vec<f64> = vec![1.0,5.0,2.0,7.0,6.0,6.0];
/// let list : Vec<f32> = vec![1.0,5.0,2.0,7.0,6.0,6.0];
/// let entry = AsmEntry::from(&name, &list);
/// # assert_eq!(*entry.name(), "Es_coli_005008_GCF_013426115_1");
/// # assert_eq!(*entry.list().get(1).unwrap(), 5f64);
/// # assert_eq!(*entry.list().get(1).unwrap(), 5f32);
/// ```
pub fn from(name: &String, vector: &[f64]) -> Self {
pub fn from(name: &String, vector: &[f32]) -> Self {
Self {
name: name.clone(),
list: Vec::from(vector),
Expand All @@ -141,15 +141,15 @@ impl AsmEntry {
/// let line = "Es_coli_005008_GCF_013426115_1\t1,5,2,7,6,6".to_string();
/// let entry = AsmEntry::parse(&line);
/// # assert_eq!(*entry.name(), "Es_coli_005008_GCF_013426115_1");
/// # assert_eq!(*entry.list().get(1).unwrap(), 5f64);
/// # assert_eq!(*entry.list().get(1).unwrap(), 5f32);
/// ```
pub fn parse(line: &str) -> AsmEntry {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() == 2 {
let name = fields[0].to_string();
let parts: Vec<&str> = fields[1].split(',').collect();
let list: Vec<f64> =
parts.iter().map(|e| e.parse::<f64>().unwrap()).collect();
let list: Vec<f32> =
parts.iter().map(|e| e.parse::<f32>().unwrap()).collect();
Self::from(&name, &list)
} else {
Self::new()
Expand All @@ -163,7 +163,7 @@ impl fmt::Display for AsmEntry {
/// ```
/// # use nwr::AsmEntry;
/// let name = "Es_coli_005008_GCF_013426115_1".to_string();
/// let list : Vec<f64> = vec![1.0,5.0,2.0,7.0,6.0,6.0];
/// let list : Vec<f32> = vec![1.0,5.0,2.0,7.0,6.0,6.0];
/// let entry = AsmEntry::from(&name, &list);
/// assert_eq!(entry.to_string(), "Es_coli_005008_GCF_013426115_1\t1,5,2,7,6,6\n");
/// ```
Expand Down
5 changes: 5 additions & 0 deletions src/nwr.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#![feature(array_chunks)]
#![feature(slice_as_chunks)]
// Add these imports to use the stdsimd library
#![feature(portable_simd)]

extern crate clap;

use clap::*;
Expand Down

0 comments on commit 0c0ff77

Please sign in to comment.