Skip to content

Commit

Permalink
DEV: performance tweaks and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
anuradhawick committed Sep 4, 2024
1 parent 323a255 commit 9bc6bde
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[package]
name = "rsbio-seq"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
authors = [
"Anuradha Wickramarachchi <anuradhawick@gmail.com>",
"Vijini Mallawaarachchi <viji.mallawaarachchi@gmail.com>",
]
description = "RSBio-Seq is a python wrapper for rust bio crate to provide fast sequence reading."
description = "RSBio-Seq is a fast and light-weight sequence reading library (built on top of rust bio crate)."
readme = "README.md"
license-file = "LICENSE"

Expand Down
31 changes: 20 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,19 @@
[![PyPI - Version](https://img.shields.io/pypi/v/rsbio-seq)](https://pypi.org/project/rsbio-seq/)
[![Upload to PyPI](https://github.com/anuradhawick/rsbio-seq/actions/workflows/pypi.yml/badge.svg)](https://github.com/anuradhawick/rsbio-seq/actions/workflows/pypi.yml)

RSBio intends to provide just reading facility on common sequence formats (FASTA/FASTQ) in both raw and compressed formats.
RSBio-Seq intends to provide just reading facility on common sequence formats (FASTA/FASTQ) in both raw and compressed formats.

## Build and install from source
## Installation

### 1. From PyPI (Recommended)

Simple use the following command

```bash
pip install rsbio-seq
```

### 2. Build and install from source

To build you need to have the following installed.

Expand All @@ -20,26 +30,21 @@ maturin develop # this installs the development version in the env
maturin develop --rust # this installs a release version in the env
```

To build a wheel
To build a wheel for installation

```bash
maturin build --release
```

You will find the `whl` file inside the `target/wheels` directory.

## Install from PyPI
You will find the `whl` file inside the `target/wheels` directory. Your `whl` file will have a name depicting your python environment and CPU architecture.

Simple use the following command

```bash
pip install rsbio-seq
```

## Usage

Once installed you can import the library and use as follows.

### Reading

```python
from rsbio_seq import SeqReader, SeqWriter, Sequence

Expand All @@ -51,7 +56,11 @@ for seq in SeqReader("path/to/seq.fasta.gz"):
print(seq.qual)
# optional description attribute
print(seq.desc)
```

### Writing

```python
# writing fasta
seq = Sequence("id", "desc", "ACGT") # id, description, sequence
writer = SeqWriter("out.fasta")
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version", "readme", "description", "license", "authors"]
keywords = ["bioinformatics", "genomics"]

[project.urls]
Documentation = "https://github.com/anuradhawick/rsbio-seq/"
"Bug Tracker" = "https://github.com/anuradhawick/rsbio-seq/issues"
"Source Code" = "https://github.com/anuradhawick/rsbio-seq/"

[tool.maturin]
features = ["pyo3/extension-module"]
58 changes: 28 additions & 30 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::seq::{SeqFormat, Sequence};
use flate2::{write::GzEncoder, Compression};
use std::{fs::File, io::Write};

#[inline]
fn wrap_string_no_whitespace(s: &str, width: usize) -> String {
let mut result = String::with_capacity(s.len() + s.len() / width);
let mut i = 0;
Expand Down Expand Up @@ -47,11 +48,13 @@ impl Writer {
}
}

#[inline]
pub fn write(&mut self, seq: Sequence, wrap: Option<u32>) -> Result<(), String> {
let writer = match &mut self.writer {
WriterType::Gzip(gz) => gz as &mut dyn Write,
WriterType::Plain(file) => file as &mut dyn Write,
};

match self.format {
SeqFormat::Fasta => {
let seq_str = if let Some(wrap) = wrap {
Expand All @@ -62,38 +65,33 @@ impl Writer {
} else {
seq.seq
};
writer.write_all(b">").map_err(|e| e.to_string())?;
writer
.write_all(seq.id.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b" ").map_err(|e| e.to_string())?;
writer
.write_all(seq.desc.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b"\n").map_err(|e| e.to_string())?;
writer
.write_all(seq_str.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b"\n").map_err(|e| e.to_string())?;
let mut buffer =
Vec::with_capacity(4 + seq.id.len() + seq.desc.len() + seq_str.len());

buffer.extend_from_slice(b">");
buffer.extend_from_slice(seq.id.as_bytes());
buffer.extend_from_slice(b" ");
buffer.extend_from_slice(seq.desc.as_bytes());
buffer.extend_from_slice(b"\n");
buffer.extend_from_slice(seq_str.as_bytes());
buffer.extend_from_slice(b"\n");
writer.write_all(&buffer).map_err(|e| e.to_string())?;
}
SeqFormat::Fastq => {
writer.write_all(b"@").map_err(|e| e.to_string())?;
writer
.write_all(seq.id.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b" ").map_err(|e| e.to_string())?;
writer
.write_all(seq.desc.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b"\n").map_err(|e| e.to_string())?;
writer
.write_all(seq.seq.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b"\n+\n").map_err(|e| e.to_string())?;
writer
.write_all(seq.qual.as_bytes())
.map_err(|e| e.to_string())?;
writer.write_all(b"\n").map_err(|e| e.to_string())?;
let mut buffer = Vec::with_capacity(
7 + seq.id.len() + seq.desc.len() + seq.seq.len() + seq.qual.len(),
);

buffer.extend_from_slice(b"@");
buffer.extend_from_slice(seq.id.as_bytes());
buffer.extend_from_slice(b" ");
buffer.extend_from_slice(seq.desc.as_bytes());
buffer.extend_from_slice(b"\n");
buffer.extend_from_slice(seq.seq.as_bytes());
buffer.extend_from_slice(b"\n+\n");
buffer.extend_from_slice(seq.qual.as_bytes());
buffer.extend_from_slice(b"\n");
writer.write_all(&buffer).map_err(|e| e.to_string())?;
}
}
Ok(())
Expand Down

0 comments on commit 9bc6bde

Please sign in to comment.