Skip to content

Commit

Permalink
Merge branch 'main' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede authored Aug 7, 2024
2 parents 2933649 + d7b203f commit 7de33e5
Show file tree
Hide file tree
Showing 14 changed files with 275 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Authors@R: c(
comment = c(ORCID = "0000-0003-3389-2191"))
)
Maintainer: Adriano Rutz <rutz@imsb.biol.ethz.ch>
Description: TIMA provides the infrastructure to perform Taxonomically
Description: This package provides the infrastructure to perform Taxonomically
Informed Metabolite Annotation.
License: GPL (>= 3)
URL: https://github.com/taxonomicallyinformedannotation/tima,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ importFrom(tidytable,unnest)
importFrom(tidytable,where)
importFrom(utils,URLencode)
importFrom(utils,combn)
importFrom(utils,globalVariables)
importFrom(utils,unzip)
importFrom(yaml,read_yaml)
importFrom(yaml,write_yaml)
8 changes: 4 additions & 4 deletions R/clean_bio.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ clean_bio <-
candidate_structure_tax_cla_03cla,
candidate_structure_tax_npc_03cla,
candidate_structure_tax_cla_04dirpar,
score_pondered_bio,
score_weighted_bio,
.keep_all = TRUE
)

Expand All @@ -95,7 +95,7 @@ clean_bio <-
candidate_structure_tax_cla_03cla,
candidate_structure_tax_npc_03cla,
candidate_structure_tax_cla_04dirpar,
score_pondered_bio
score_weighted_bio
),
by = setNames("feature_id", "feature_target")
) |>
Expand All @@ -112,7 +112,7 @@ clean_bio <-
distinct(
feature_source,
feature_target, !!as.name(candidates),
score_pondered_bio
score_weighted_bio
) |>
mutate(
count = n_distinct(feature_target),
Expand All @@ -126,7 +126,7 @@ clean_bio <-
distinct(feature_source, !!as.name(candidates), .keep_all = TRUE) |>
mutate(
!!as.name(feature_score_name) :=
!!as.name(consistency_name) * score_pondered_bio,
!!as.name(consistency_name) * score_weighted_bio,
.by = c(feature_source, !!as.name(candidates))
) |>
arrange(-!!as.name(feature_score_name)) |>
Expand Down
8 changes: 4 additions & 4 deletions R/clean_chemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,14 @@ clean_chemo <-
}

df1 <- df1 |>
arrange(desc(score_pondered_chemo)) |>
arrange(desc(score_weighted_chemo)) |>
distinct(feature_id,
candidate_structure_inchikey_no_stereo,
.keep_all = TRUE
) |>
mutate(
rank_initial = dense_rank(-candidate_score_pseudo_initial),
rank_final = dense_rank(-score_pondered_chemo),
rank_final = dense_rank(-score_weighted_chemo),
.by = c(feature_id)
) |>
filter(rank_final <= candidates_final)
Expand Down Expand Up @@ -176,9 +176,9 @@ clean_chemo <-
model$rank_columns,
"score_initial" = "candidate_score_pseudo_initial",
"score_biological",
"score_interim" = "score_pondered_bio",
"score_interim" = "score_weighted_bio",
"score_chemical",
"score_final" = "score_pondered_chemo"
"score_final" = "score_weighted_chemo"
)
)) |>
distinct() |>
Expand Down
253 changes: 252 additions & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
@@ -1 +1,252 @@
utils::globalVariables(c(":=", "!!"))
import::from(utils, globalVariables, .into = environment())

#' @importFrom utils globalVariables
globalVariables(
c(
":=",
"!!",
"adduct",
"adduct.x",
"adduct.y",
"adduct_dest",
"candidate_adduct",
"candidate_count_similarity_peaks_matched",
"candidate_library",
"candidate_organism_01_domain",
"candidate_organism_02_kingdom",
"candidate_organism_03_phylum",
"candidate_organism_04_class",
"candidate_organism_05_order",
"candidate_organism_06_family",
"candidate_organism_07_tribe",
"candidate_organism_08_genus",
"candidate_organism_09_species",
"candidate_organism_10_varietas",
"candidate_score_pseudo_initial",
"candidate_score_similarity",
"candidate_score_sirius_csi",
"candidate_score_sirius_csi_tmp",
"candidate_spectrum_entropy",
"candidate_structure_error_mz",
"candidate_structure_error_rt",
"candidate_structure_exact_mass",
"candidate_structure_exact_mass_i",
"candidate_structure_exact_mass_s",
"candidate_structure_inchi",
"candidate_structure_inchikey_no_stereo",
"candidate_structure_inchikey_no_stereo_i",
"candidate_structure_inchikey_no_stereo_s",
"candidate_structure_molecular_formula",
"candidate_structure_molecular_formula_i",
"candidate_structure_molecular_formula_s",
"candidate_structure_name",
"candidate_structure_name_i",
"candidate_structure_name_s",
"candidate_structure_organism_occurrence_reference",
"candidate_structure_smiles_no_stereo",
"candidate_structure_smiles_no_stereo_i",
"candidate_structure_smiles_no_stereo_s",
"candidate_structure_tax_cla_chemontid",
"candidate_structure_tax_cla_chemontid_i",
"candidate_structure_tax_cla_chemontid_s",
"candidate_structure_tax_cla_01kin",
"candidate_structure_tax_cla_01kin_i",
"candidate_structure_tax_cla_01kin_s",
"candidate_structure_tax_cla_02sup",
"candidate_structure_tax_cla_02sup_i",
"candidate_structure_tax_cla_02sup_s",
"candidate_structure_tax_cla_03cla",
"candidate_structure_tax_cla_03cla_i",
"candidate_structure_tax_cla_03cla_s",
"candidate_structure_tax_cla_04dirpar",
"candidate_structure_tax_cla_04dirpar_i",
"candidate_structure_tax_cla_04dirpar_s",
"candidate_structure_tax_npc_01pat",
"candidate_structure_tax_npc_01pat_i",
"candidate_structure_tax_npc_01pat_s",
"candidate_structure_tax_npc_02sup",
"candidate_structure_tax_npc_02sup_i",
"candidate_structure_tax_npc_02sup_s",
"candidate_structure_tax_npc_03cla",
"candidate_structure_tax_npc_03cla_i",
"candidate_structure_tax_npc_03cla_s",
"candidate_structure_xlogp",
"candidate_structure_xlogp_i",
"candidate_structure_xlogp_s",
"canonical_name",
"cluster",
"cluster index",
"ComponentIndex",
"componentindex",
"compound_id",
"consistency_structure_cla_cla",
"consistency_structure_cla_kin",
"consistency_structure_cla_par",
"consistency_structure_cla_sup",
"consistency_structure_npc_cla",
"consistency_structure_npc_pat",
"consistency_structure_npc_sup",
"delta_max",
"delta_min",
"Distance",
"error_mz",
"exactmass",
"exact_mass",
"feature_id",
"feature_id.x",
"feature_id.y",
"feature_id_dest",
"feature_pred_tax_cla_01kin_val",
"feature_pred_tax_cla_01kin_score",
"feature_pred_tax_cla_02sup_val",
"feature_pred_tax_cla_02sup_score",
"feature_pred_tax_cla_03cla_val",
"feature_pred_tax_cla_03cla_score",
"feature_pred_tax_cla_04dirpar_val",
"feature_pred_tax_cla_04dirpar_score",
"feature_pred_tax_npc_01pat_val",
"feature_pred_tax_npc_01pat_score",
"feature_pred_tax_npc_02sup_val",
"feature_pred_tax_npc_02sup_score",
"feature_pred_tax_npc_03cla_val",
"feature_pred_tax_npc_03cla_score",
"feature_source",
"feature_spectrum_entropy",
"feature_spectrum_peaks",
"feature_target",
"filename",
"formula",
"Group1",
"Group2",
"id",
"inchikey",
"inchikey_no_stereo",
"intensity",
"ionMass",
"Item1",
"Item2",
"l",
"label",
"Label",
"library_name",
"loss",
"mappingFeatureId",
"mass",
"mass_max",
"mass_min",
"massErrorPrecursor(ppm)",
"moldb_formula",
"moldb_inchikey",
"moldb_logp",
"moldb_mono_mass",
"moldb_smiles",
"mz",
"mz.x",
"mz.y",
"mz_dest",
"MZErrorPPM",
"n",
"name",
"on",
"organism",
"organism_name",
"organism_taxonomy_01domain",
"organism_taxonomy_02kingdom",
"organism_taxonomy_03phylum",
"organism_taxonomy_04class",
"organism_taxonomy_05order",
"organism_taxonomy_06family",
"organism_taxonomy_07tribe",
"organism_taxonomy_08genus",
"organism_taxonomy_09species",
"organism_taxonomy_10varietas",
"organism_taxonomy_ottid",
"ott_id",
"output",
"precursorMz",
"Precursor_MZ",
"rank_final",
"reference_doi",
"rowname",
"rt",
"rt_max",
"rt_min",
"rt_target",
"rt.x",
"sample_organism_01_domain",
"sample_organism_02_kingdom",
"sample_organism_03_phylum",
"sample_organism_04_class",
"sample_organism_05_order",
"sample_organism_06_family",
"sample_organism_07_tribe",
"sample_organism_08_genus",
"sample_organism_09_species",
"sample_organism_10_varietas",
"score_biological",
"score_biological_01",
"score_biological_02",
"score_biological_03",
"score_biological_04",
"score_biological_05",
"score_biological_06",
"score_biological_07",
"score_biological_08",
"score_biological_09",
"score_biological_10",
"score_chemical",
"score_chemical_1",
"score_chemical_2",
"score_chemical_3",
"score_chemical_4",
"score_chemical_5",
"score_chemical_6",
"score_chemical_7",
"score_weighted_bio",
"score_weighted_chemo",
"search_string",
"selection",
"smiles",
"smiles_no_stereo",
"spectrum_id",
"structure_exact_mass",
"structure_inchikey",
"structure_inchikey_2D",
"structure_inchikey_no_stereo",
"structure_molecular_formula",
"structure_name",
"structure_nameTraditional",
"structure_smiles",
"structure_smiles_2D",
"structure_smiles_no_stereo",
"structure_tax_cla_01kin",
"structure_tax_cla_02sup",
"structure_tax_cla_03cla",
"structure_tax_cla_04dirpar",
"structure_tax_cla_chemontid",
"structure_tax_npc_01pat",
"structure_tax_npc_02sup",
"structure_tax_npc_03cla",
"structure_taxonomy_classyfire_chemontid",
"structure_taxonomy_classyfire_01kingdom",
"structure_taxonomy_classyfire_02superclass",
"structure_taxonomy_classyfire_03class",
"structure_taxonomy_classyfire_04directparent",
"structure_taxonomy_npclassifier_01pathway",
"structure_taxonomy_npclassifier_02superclass",
"structure_taxonomy_npclassifier_03class",
"structure_xlogp",
"target_id",
"target_inchikey",
"target_inchikey_no_stereo",
"target_precursorMz",
"target_smiles",
"target_smiles_no_stereo",
"type",
"unique_name",
"val",
"value",
"value_max",
"value_min"
)
)
2 changes: 2 additions & 0 deletions R/go_to_cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import::from(fs, path_home, .into = environment())
#' @importFrom fs dir_create
#' @importFrom fs path_home
#'
#' @param dir Directory
#'
#' @return Goes to cache
#'
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/weight_bio.R
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ weight_bio <-
) |>
select(-candidate_score_sirius_csi_tmp) |>
mutate(
score_pondered_bio = (1 / (weight_biological + weight_spectral)) * weight_biological * score_biological + (1 / (weight_biological + weight_spectral)) * weight_spectral * candidate_score_pseudo_initial
score_weighted_bio = (1 / (weight_biological + weight_spectral)) * weight_biological * score_biological + (1 / (weight_biological + weight_spectral)) * weight_spectral * candidate_score_pseudo_initial
)

rm(
Expand Down
2 changes: 1 addition & 1 deletion R/weight_chemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ weight_chemo <-
right_join(annot_table_wei_bio_clean) |>
log_pipe("... calculating weighted chemical score \n") |>
mutate(
score_pondered_chemo = (1 / (
score_weighted_chemo = (1 / (
weight_chemical + weight_biological + weight_spectral
)) * weight_chemical * score_chemical + (1 / (
weight_chemical + weight_biological + weight_spectral
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ knitr::opts_chunk$set(
[![R-CMD-check](https://github.com/taxonomicallyinformedannotation/tima/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/taxonomicallyinformedannotation/tima/actions/workflows/R-CMD-check.yaml)
[![R-Universe](https://taxonomicallyinformedannotation.r-universe.dev/badges/tima)](https://taxonomicallyinformedannotation.r-universe.dev/tima)
[![Codecov test coverage](https://codecov.io/gh/taxonomicallyinformedannotation/tima/graph/badge.svg)](https://app.codecov.io/gh/taxonomicallyinformedannotation/tima)
[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima/)
[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima-r/)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5797920.svg)](https://doi.org/10.5281/zenodo.5797920)
<!-- badges: end -->

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ status](https://www.r-pkg.org/badges/version/tima)](https://CRAN.R-project.org/p
[![R-Universe](https://taxonomicallyinformedannotation.r-universe.dev/badges/tima)](https://taxonomicallyinformedannotation.r-universe.dev/tima)
[![Codecov test
coverage](https://codecov.io/gh/taxonomicallyinformedannotation/tima/graph/badge.svg)](https://app.codecov.io/gh/taxonomicallyinformedannotation/tima)
[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima/)
[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima-r/)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5797920.svg)](https://doi.org/10.5281/zenodo.5797920)
<!-- badges: end -->

Expand Down
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"@type": "SoftwareSourceCode",
"identifier": "tima",
"description": "TIMA provides the infrastructure to perform Taxonomically Informed Metabolite Annotation.",
"description": "This package provides the infrastructure to perform Taxonomically Informed Metabolite Annotation.",
"name": "tima: Taxonomically Informed Metabolite Annotation",
"relatedLink": "https://taxonomicallyinformedannotation.github.io/tima",
"codeRepository": "https://github.com/taxonomicallyinformedannotation/tima",
Expand Down Expand Up @@ -644,7 +644,7 @@
"SystemRequirements": null
},
"keywords": ["metaboliteannotation", "chemotaxonomy", "scoringsystem", "naturalproducts", "computationalmetabolomics", "taxonomicdistance", "specializedmetabolome"],
"fileSize": "3166.651KB",
"fileSize": "3177.113KB",
"citation": [
{
"@type": "ScholarlyArticle",
Expand Down
Loading

0 comments on commit 7de33e5

Please sign in to comment.