Skip to content

Commit ae9fe7b

Browse files
authored
Merge pull request #64 from unipept/feature/reference-proteomes-endpoint
Add private_api/proteomes endpoint
2 parents acbf47c + 0462150 commit ae9fe7b

13 files changed

+289
-8
lines changed

.devcontainer/devcontainer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
// Features to add to the dev container. More info: https://containers.dev/features.
88
"features": {
9-
"https://github.com/unipept/unipept-devcontainers/releases/download/v1.0.0/devcontainer-feature-unipept-index.tgz": {
9+
"https://github.com/unipept/unipept-devcontainers/releases/download/v1.0.1/devcontainer-feature-unipept-index.tgz": {
1010
"version": "latest"
1111
},
1212
"ghcr.io/devcontainers/features/rust:1": {}

Cargo.lock

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/src/controllers/private_api/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ pub mod metadata;
55
pub mod proteins;
66
pub mod taxa;
77
pub mod taxa_filter;
8+
pub mod reference_proteomes;
9+
pub mod reference_proteomes_filter;
810

911
pub fn default_equate_il() -> bool {
1012
false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use axum::{extract::State, Json};
2+
use serde::{Deserialize, Serialize};
3+
use crate::{
4+
controllers::generate_handlers,
5+
AppState
6+
};
7+
8+
#[derive(Serialize, Deserialize)]
9+
pub struct Parameters {
10+
#[serde(default)]
11+
proteomes: Vec<String>
12+
}
13+
14+
#[derive(Serialize)]
15+
pub struct ReferenceProteome {
16+
id: String,
17+
taxon_id: u32,
18+
taxon_name: String,
19+
protein_count: u32
20+
}
21+
22+
async fn handler(
23+
State(AppState { datastore, .. }): State<AppState>,
24+
Parameters { proteomes }: Parameters
25+
) -> Result<Vec<ReferenceProteome>, ()> {
26+
Ok(
27+
proteomes
28+
.iter()
29+
.map(|proteome| proteome.trim())
30+
.filter_map(|proteome| {
31+
datastore.reference_proteome_store().get(proteome).map(|(taxon_id, protein_count)| {
32+
let taxon_name = datastore
33+
.taxon_store()
34+
.get_name(*taxon_id).cloned() // Clone the &String to String
35+
.unwrap_or_else(|| "Unknown".to_string()); // Use `unwrap_or_else` for a default
36+
37+
ReferenceProteome {
38+
id: proteome.to_string(),
39+
taxon_id: *taxon_id,
40+
taxon_name,
41+
protein_count: *protein_count,
42+
}
43+
44+
})
45+
})
46+
.collect()
47+
)
48+
}
49+
50+
generate_handlers!(
51+
async fn json_handler(
52+
state => State<AppState>,
53+
params => Parameters
54+
) -> Result<Json<Vec<ReferenceProteome>>, ()> {
55+
Ok(Json(handler(state, params).await?))
56+
}
57+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
use axum::{extract::State, Json};
2+
use serde::{Deserialize, Serialize};
3+
use crate::{
4+
controllers::generate_handlers,
5+
AppState
6+
};
7+
8+
fn default_filter() -> String {
9+
String::from("")
10+
}
11+
12+
#[derive(Deserialize)]
13+
pub struct ReferenceProteomeCountParameters {
14+
#[serde(default = "default_filter")]
15+
filter: String
16+
}
17+
18+
#[derive(Deserialize)]
19+
pub struct ReferenceProteomeFilterParameters {
20+
#[serde(default = "default_filter")]
21+
filter: String,
22+
start: usize,
23+
end: usize,
24+
#[serde(default)]
25+
sort_by: String, // Can be "id", "name", or "rank"
26+
#[serde(default)]
27+
sort_descending: bool
28+
}
29+
30+
#[derive(Serialize)]
31+
pub struct ReferenceProteomeCountResult {
32+
count: u32
33+
}
34+
35+
fn get_taxon_name_by_id(taxon_store: &datastore::TaxonStore, taxon_id: u32) -> String {
36+
taxon_store
37+
.get_name(taxon_id)
38+
.cloned()
39+
.unwrap_or_else(|| "Unknown".to_string())
40+
}
41+
42+
async fn count_handler(
43+
State(AppState { datastore, .. }): State<AppState>,
44+
ReferenceProteomeCountParameters { filter }: ReferenceProteomeCountParameters
45+
) -> Result<ReferenceProteomeCountResult, ()> {
46+
let proteome_store = datastore.reference_proteome_store();
47+
48+
if filter.is_empty() {
49+
Ok(ReferenceProteomeCountResult {
50+
count: proteome_store.mapper
51+
.values()
52+
.count() as u32
53+
})
54+
} else {
55+
Ok(ReferenceProteomeCountResult {
56+
count: proteome_store.mapper
57+
.iter()
58+
.filter(|(key, (taxon_id, _))| {
59+
let taxon_name = get_taxon_name_by_id(datastore.taxon_store(), *taxon_id);
60+
61+
key.to_lowercase().contains(&filter.to_lowercase()) ||
62+
taxon_id.to_string().contains(&filter) ||
63+
taxon_name.to_lowercase().contains(&filter.to_lowercase())
64+
})
65+
.count() as u32
66+
})
67+
}
68+
}
69+
70+
async fn filter_handler(
71+
State(AppState { datastore, .. }): State<AppState>,
72+
ReferenceProteomeFilterParameters {
73+
filter,
74+
start,
75+
end,
76+
sort_by,
77+
sort_descending
78+
}: ReferenceProteomeFilterParameters
79+
) -> Result<Vec<String>, ()> {
80+
let proteome_store = datastore.reference_proteome_store();
81+
82+
let mut filtered_proteomes: Vec<(&String, &(u32, u32))> = proteome_store.mapper
83+
.iter()
84+
.filter(|(key, (taxon_id, _))| {
85+
let taxon_name = get_taxon_name_by_id(datastore.taxon_store(), *taxon_id);
86+
87+
key.to_lowercase().contains(&filter.to_lowercase()) ||
88+
taxon_id.to_string().contains(&filter) ||
89+
taxon_name.to_lowercase().contains(&filter.to_lowercase())
90+
})
91+
.collect();
92+
93+
// Sort based on the `sort_by` field
94+
match sort_by.as_str() {
95+
"taxon_name" => {
96+
let sort_fn = |a_taxon_id, b_taxon_id| {
97+
let taxon_name_a = get_taxon_name_by_id(datastore.taxon_store(), a_taxon_id);
98+
let taxon_name_b = get_taxon_name_by_id(datastore.taxon_store(), b_taxon_id);
99+
100+
if sort_descending {
101+
taxon_name_b.cmp(&taxon_name_a)
102+
} else {
103+
taxon_name_a.cmp(&taxon_name_b)
104+
}
105+
};
106+
107+
filtered_proteomes.sort_by(|(_, &(a_taxon_id, _)), (_, &(b_taxon_id,_))| {
108+
sort_fn(a_taxon_id, b_taxon_id)
109+
});
110+
},
111+
"protein_count" => {
112+
filtered_proteomes.sort_by(|(_, &(_, a_protein_count)), (_, &(_, b_protein_count))| {
113+
if sort_descending {
114+
b_protein_count.cmp(&a_protein_count)
115+
} else {
116+
a_protein_count.cmp(&b_protein_count)
117+
}
118+
});
119+
},
120+
_ => {
121+
filtered_proteomes.sort_by(|(a_proteome_id, _), (b_proteome_id, _)| {
122+
if sort_descending {
123+
b_proteome_id.cmp(a_proteome_id)
124+
} else {
125+
a_proteome_id.cmp(b_proteome_id)
126+
}
127+
});
128+
},
129+
}
130+
131+
Ok(filtered_proteomes.into_iter().skip(start).take(end - start).map(|(key, _)| key.to_string()).collect())
132+
}
133+
134+
generate_handlers!(
135+
async fn json_count_handler(
136+
state => State<AppState>,
137+
params => ReferenceProteomeCountParameters
138+
) -> Result<Json<ReferenceProteomeCountResult>, ()> {
139+
Ok(Json(count_handler(state, params).await?))
140+
}
141+
);
142+
143+
generate_handlers!(
144+
async fn json_filter_handler(
145+
state => State<AppState>,
146+
params => ReferenceProteomeFilterParameters
147+
) -> Result<Json<Vec<String>>, ()> {
148+
Ok(Json(filter_handler(state, params).await?))
149+
}
150+
);

api/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub async fn start(index_location: &str, database_address: &str, port: u32) -> R
2828
let ec_numbers = format!("{}/datastore/ec_numbers.tsv", index_location);
2929
let go_terms = format!("{}/datastore/go_terms.tsv", index_location);
3030
let interpro_entries = format!("{}/datastore/interpro_entries.tsv", index_location);
31+
let reference_proteomes = format!("{}/datastore/reference_proteomes.tsv", index_location);
3132
let lineages = format!("{}/datastore/lineages.tsv", index_location);
3233
let taxons = format!("{}/datastore/taxons.tsv", index_location);
3334

@@ -42,6 +43,7 @@ pub async fn start(index_location: &str, database_address: &str, port: u32) -> R
4243
&ec_numbers,
4344
&go_terms,
4445
&interpro_entries,
46+
&reference_proteomes,
4547
&lineages,
4648
&taxons
4749
)?;

api/src/routes.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
},
1515
datasets::sampledata,
1616
mpa::{pept2data},
17-
private_api::{ecnumbers, goterms, interpros, metadata, proteins, taxa, taxa_filter}
17+
private_api::{ecnumbers, goterms, interpros, metadata, proteins, reference_proteomes, reference_proteomes_filter, taxa, taxa_filter}
1818
},
1919
middleware::{
2020
cors::create_cors_layer,
@@ -148,6 +148,12 @@ fn create_private_api_routes() -> Router<AppState> {
148148
get(metadata::get_json_handler).post(metadata::post_json_handler),
149149
"/proteins",
150150
get(proteins::get_json_handler).post(proteins::post_json_handler),
151+
"/proteomes",
152+
get(reference_proteomes::get_json_handler).post(reference_proteomes::post_json_handler),
153+
"/proteomes/count",
154+
get(reference_proteomes_filter::get_json_count_handler).post(reference_proteomes_filter::post_json_count_handler),
155+
"/proteomes/filter",
156+
get(reference_proteomes_filter::get_json_filter_handler).post(reference_proteomes_filter::post_json_filter_handler),
151157
"/taxa",
152158
get(taxa::get_json_handler).post(taxa::post_json_handler),
153159
"/taxa/count",

database/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "database"
3-
version = "0.1.0"
3+
version = "2.3.2"
44
edition = "2021"
55

66
[dependencies]

datastore/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "datastore"
3-
version = "0.1.0"
3+
version = "2.3.2"
44
edition = "2021"
55

66
[dependencies]

datastore/src/errors.rs

+12
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ pub enum DataStoreError {
1212
GoStoreError(#[from] GoStoreError),
1313
#[error("Interpro store error: {0}")]
1414
InterproStoreError(#[from] InterproStoreError),
15+
#[error("Reference proteome store error: {0}")]
16+
ReferenceProteomeStoreError(#[from] ReferenceProteomeStoreError),
1517
#[error("Lineage store error: {0}")]
1618
LineageStoreError(#[from] LineageStoreError),
1719
#[error("Taxon store error: {0}")]
@@ -52,6 +54,16 @@ pub enum InterproStoreError {
5254
FileNotFound(String)
5355
}
5456

57+
#[derive(Error, Debug)]
58+
pub enum ReferenceProteomeStoreError {
59+
#[error("{0}")]
60+
IoError(#[from] std::io::Error),
61+
#[error("File not found: {0}")]
62+
FileNotFound(String),
63+
#[error("Error while parsing: {0}")]
64+
ParseError(String),
65+
}
66+
5567
#[derive(Error, Debug)]
5668
pub enum LineageStoreError {
5769
#[error("{0}")]

datastore/src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod interpro_store;
55
mod lineage_store;
66
mod sample_store;
77
mod taxon_store;
8+
mod reference_proteome_store;
89

910
pub use ec_store::EcStore;
1011
pub use errors::DataStoreError;
@@ -13,24 +14,28 @@ pub use interpro_store::InterproStore;
1314
pub use lineage_store::{Lineage, LineageStore};
1415
pub use sample_store::SampleStore;
1516
pub use taxon_store::{LineageRank, TaxonStore};
17+
pub use reference_proteome_store::ReferenceProteomeStore;
1618

1719
pub struct DataStore {
1820
version: String,
1921
sample_store: SampleStore,
2022
ec_store: EcStore,
2123
go_store: GoStore,
2224
interpro_store: InterproStore,
25+
reference_proteome_store: ReferenceProteomeStore,
2326
lineage_store: LineageStore,
2427
taxon_store: TaxonStore
2528
}
2629

2730
impl DataStore {
31+
#[allow(clippy::too_many_arguments)]
2832
pub fn try_from_files(
2933
version_file: &str,
3034
sample_file: &str,
3135
ec_file: &str,
3236
go_file: &str,
3337
interpro_file: &str,
38+
reference_proteome_file: &str,
3439
lineage_file: &str,
3540
taxon_file: &str
3641
) -> Result<Self, DataStoreError> {
@@ -43,6 +48,7 @@ impl DataStore {
4348
ec_store: EcStore::try_from_file(ec_file)?,
4449
go_store: GoStore::try_from_file(go_file)?,
4550
interpro_store: InterproStore::try_from_file(interpro_file)?,
51+
reference_proteome_store: ReferenceProteomeStore::try_from_file(reference_proteome_file)?,
4652
lineage_store: LineageStore::try_from_file(lineage_file)?,
4753
taxon_store: TaxonStore::try_from_file(taxon_file)?
4854
})
@@ -67,6 +73,8 @@ impl DataStore {
6773
pub fn interpro_store(&self) -> &InterproStore {
6874
&self.interpro_store
6975
}
76+
77+
pub fn reference_proteome_store(&self) -> &ReferenceProteomeStore { &self.reference_proteome_store }
7078

7179
pub fn lineage_store(&self) -> &LineageStore {
7280
&self.lineage_store

0 commit comments

Comments
 (0)