Merge branch 'main' into dev

taxonomicallyinformedannotation · Aug 7, 2024 · 7de33e5 · 7de33e5
2 parents 2933649 + d7b203f
commit 7de33e5
Show file tree

Hide file tree

Showing 14 changed files with 275 additions and 18 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R: c(
  comment = c(ORCID = "0000-0003-3389-2191"))
  )
 Maintainer: Adriano Rutz <rutz@imsb.biol.ethz.ch>
-Description: TIMA provides the infrastructure to perform Taxonomically
+Description: This package provides the infrastructure to perform Taxonomically
  Informed Metabolite Annotation.
 License: GPL (>= 3)
 URL: https://github.com/taxonomicallyinformedannotation/tima,

diff --git a/NAMESPACE b/NAMESPACE
@@ -199,6 +199,7 @@ importFrom(tidytable,unnest)
 importFrom(tidytable,where)
 importFrom(utils,URLencode)
 importFrom(utils,combn)
+importFrom(utils,globalVariables)
 importFrom(utils,unzip)
 importFrom(yaml,read_yaml)
 importFrom(yaml,write_yaml)
diff --git a/R/clean_bio.R b/R/clean_bio.R
@@ -71,7 +71,7 @@ clean_bio <-
  candidate_structure_tax_cla_03cla,
  candidate_structure_tax_npc_03cla,
  candidate_structure_tax_cla_04dirpar,
- score_pondered_bio,
+ score_weighted_bio,
  .keep_all = TRUE
  )
 
@@ -95,7 +95,7 @@ clean_bio <-
  candidate_structure_tax_cla_03cla,
  candidate_structure_tax_npc_03cla,
  candidate_structure_tax_cla_04dirpar,
- score_pondered_bio
+ score_weighted_bio
  ),
  by = setNames("feature_id", "feature_target")
  ) |>
@@ -112,7 +112,7 @@ clean_bio <-
  distinct(
  feature_source,
  feature_target, !!as.name(candidates),
- score_pondered_bio
+ score_weighted_bio
  ) |>
  mutate(
  count = n_distinct(feature_target),
@@ -126,7 +126,7 @@ clean_bio <-
  distinct(feature_source, !!as.name(candidates), .keep_all = TRUE) |>
  mutate(
  !!as.name(feature_score_name) :=
- !!as.name(consistency_name) * score_pondered_bio,
+ !!as.name(consistency_name) * score_weighted_bio,
  .by = c(feature_source, !!as.name(candidates))
  ) |>
  arrange(-!!as.name(feature_score_name)) |>

diff --git a/R/clean_chemo.R b/R/clean_chemo.R
@@ -130,14 +130,14 @@ clean_chemo <-
  }
 
  df1 <- df1 |>
- arrange(desc(score_pondered_chemo)) |>
+ arrange(desc(score_weighted_chemo)) |>
  distinct(feature_id,
  candidate_structure_inchikey_no_stereo,
  .keep_all = TRUE
  ) |>
  mutate(
  rank_initial = dense_rank(-candidate_score_pseudo_initial),
- rank_final = dense_rank(-score_pondered_chemo),
+ rank_final = dense_rank(-score_weighted_chemo),
  .by = c(feature_id)
  ) |>
  filter(rank_final <= candidates_final)
@@ -176,9 +176,9 @@ clean_chemo <-
  model$rank_columns,
  "score_initial" = "candidate_score_pseudo_initial",
  "score_biological",
- "score_interim" = "score_pondered_bio",
+ "score_interim" = "score_weighted_bio",
  "score_chemical",
- "score_final" = "score_pondered_chemo"
+ "score_final" = "score_weighted_chemo"
  )
  )) |>
  distinct() |>

diff --git a/R/globals.R b/R/globals.R
@@ -1 +1,252 @@
-utils::globalVariables(c(":=", "!!"))
+import::from(utils, globalVariables, .into = environment())
+
+#' @importFrom utils globalVariables
+globalVariables(
+ c(
+ ":=",
+ "!!",
+ "adduct",
+ "adduct.x",
+ "adduct.y",
+ "adduct_dest",
+ "candidate_adduct",
+ "candidate_count_similarity_peaks_matched",
+ "candidate_library",
+ "candidate_organism_01_domain",
+ "candidate_organism_02_kingdom",
+ "candidate_organism_03_phylum",
+ "candidate_organism_04_class",
+ "candidate_organism_05_order",
+ "candidate_organism_06_family",
+ "candidate_organism_07_tribe",
+ "candidate_organism_08_genus",
+ "candidate_organism_09_species",
+ "candidate_organism_10_varietas",
+ "candidate_score_pseudo_initial",
+ "candidate_score_similarity",
+ "candidate_score_sirius_csi",
+ "candidate_score_sirius_csi_tmp",
+ "candidate_spectrum_entropy",
+ "candidate_structure_error_mz",
+ "candidate_structure_error_rt",
+ "candidate_structure_exact_mass",
+ "candidate_structure_exact_mass_i",
+ "candidate_structure_exact_mass_s",
+ "candidate_structure_inchi",
+ "candidate_structure_inchikey_no_stereo",
+ "candidate_structure_inchikey_no_stereo_i",
+ "candidate_structure_inchikey_no_stereo_s",
+ "candidate_structure_molecular_formula",
+ "candidate_structure_molecular_formula_i",
+ "candidate_structure_molecular_formula_s",
+ "candidate_structure_name",
+ "candidate_structure_name_i",
+ "candidate_structure_name_s",
+ "candidate_structure_organism_occurrence_reference",
+ "candidate_structure_smiles_no_stereo",
+ "candidate_structure_smiles_no_stereo_i",
+ "candidate_structure_smiles_no_stereo_s",
+ "candidate_structure_tax_cla_chemontid",
+ "candidate_structure_tax_cla_chemontid_i",
+ "candidate_structure_tax_cla_chemontid_s",
+ "candidate_structure_tax_cla_01kin",
+ "candidate_structure_tax_cla_01kin_i",
+ "candidate_structure_tax_cla_01kin_s",
+ "candidate_structure_tax_cla_02sup",
+ "candidate_structure_tax_cla_02sup_i",
+ "candidate_structure_tax_cla_02sup_s",
+ "candidate_structure_tax_cla_03cla",
+ "candidate_structure_tax_cla_03cla_i",
+ "candidate_structure_tax_cla_03cla_s",
+ "candidate_structure_tax_cla_04dirpar",
+ "candidate_structure_tax_cla_04dirpar_i",
+ "candidate_structure_tax_cla_04dirpar_s",
+ "candidate_structure_tax_npc_01pat",
+ "candidate_structure_tax_npc_01pat_i",
+ "candidate_structure_tax_npc_01pat_s",
+ "candidate_structure_tax_npc_02sup",
+ "candidate_structure_tax_npc_02sup_i",
+ "candidate_structure_tax_npc_02sup_s",
+ "candidate_structure_tax_npc_03cla",
+ "candidate_structure_tax_npc_03cla_i",
+ "candidate_structure_tax_npc_03cla_s",
+ "candidate_structure_xlogp",
+ "candidate_structure_xlogp_i",
+ "candidate_structure_xlogp_s",
+ "canonical_name",
+ "cluster",
+ "cluster index",
+ "ComponentIndex",
+ "componentindex",
+ "compound_id",
+ "consistency_structure_cla_cla",
+ "consistency_structure_cla_kin",
+ "consistency_structure_cla_par",
+ "consistency_structure_cla_sup",
+ "consistency_structure_npc_cla",
+ "consistency_structure_npc_pat",
+ "consistency_structure_npc_sup",
+ "delta_max",
+ "delta_min",
+ "Distance",
+ "error_mz",
+ "exactmass",
+ "exact_mass",
+ "feature_id",
+ "feature_id.x",
+ "feature_id.y",
+ "feature_id_dest",
+ "feature_pred_tax_cla_01kin_val",
+ "feature_pred_tax_cla_01kin_score",
+ "feature_pred_tax_cla_02sup_val",
+ "feature_pred_tax_cla_02sup_score",
+ "feature_pred_tax_cla_03cla_val",
+ "feature_pred_tax_cla_03cla_score",
+ "feature_pred_tax_cla_04dirpar_val",
+ "feature_pred_tax_cla_04dirpar_score",
+ "feature_pred_tax_npc_01pat_val",
+ "feature_pred_tax_npc_01pat_score",
+ "feature_pred_tax_npc_02sup_val",
+ "feature_pred_tax_npc_02sup_score",
+ "feature_pred_tax_npc_03cla_val",
+ "feature_pred_tax_npc_03cla_score",
+ "feature_source",
+ "feature_spectrum_entropy",
+ "feature_spectrum_peaks",
+ "feature_target",
+ "filename",
+ "formula",
+ "Group1",
+ "Group2",
+ "id",
+ "inchikey",
+ "inchikey_no_stereo",
+ "intensity",
+ "ionMass",
+ "Item1",
+ "Item2",
+ "l",
+ "label",
+ "Label",
+ "library_name",
+ "loss",
+ "mappingFeatureId",
+ "mass",
+ "mass_max",
+ "mass_min",
+ "massErrorPrecursor(ppm)",
+ "moldb_formula",
+ "moldb_inchikey",
+ "moldb_logp",
+ "moldb_mono_mass",
+ "moldb_smiles",
+ "mz",
+ "mz.x",
+ "mz.y",
+ "mz_dest",
+ "MZErrorPPM",
+ "n",
+ "name",
+ "on",
+ "organism",
+ "organism_name",
+ "organism_taxonomy_01domain",
+ "organism_taxonomy_02kingdom",
+ "organism_taxonomy_03phylum",
+ "organism_taxonomy_04class",
+ "organism_taxonomy_05order",
+ "organism_taxonomy_06family",
+ "organism_taxonomy_07tribe",
+ "organism_taxonomy_08genus",
+ "organism_taxonomy_09species",
+ "organism_taxonomy_10varietas",
+ "organism_taxonomy_ottid",
+ "ott_id",
+ "output",
+ "precursorMz",
+ "Precursor_MZ",
+ "rank_final",
+ "reference_doi",
+ "rowname",
+ "rt",
+ "rt_max",
+ "rt_min",
+ "rt_target",
+ "rt.x",
+ "sample_organism_01_domain",
+ "sample_organism_02_kingdom",
+ "sample_organism_03_phylum",
+ "sample_organism_04_class",
+ "sample_organism_05_order",
+ "sample_organism_06_family",
+ "sample_organism_07_tribe",
+ "sample_organism_08_genus",
+ "sample_organism_09_species",
+ "sample_organism_10_varietas",
+ "score_biological",
+ "score_biological_01",
+ "score_biological_02",
+ "score_biological_03",
+ "score_biological_04",
+ "score_biological_05",
+ "score_biological_06",
+ "score_biological_07",
+ "score_biological_08",
+ "score_biological_09",
+ "score_biological_10",
+ "score_chemical",
+ "score_chemical_1",
+ "score_chemical_2",
+ "score_chemical_3",
+ "score_chemical_4",
+ "score_chemical_5",
+ "score_chemical_6",
+ "score_chemical_7",
+ "score_weighted_bio",
+ "score_weighted_chemo",
+ "search_string",
+ "selection",
+ "smiles",
+ "smiles_no_stereo",
+ "spectrum_id",
+ "structure_exact_mass",
+ "structure_inchikey",
+ "structure_inchikey_2D",
+ "structure_inchikey_no_stereo",
+ "structure_molecular_formula",
+ "structure_name",
+ "structure_nameTraditional",
+ "structure_smiles",
+ "structure_smiles_2D",
+ "structure_smiles_no_stereo",
+ "structure_tax_cla_01kin",
+ "structure_tax_cla_02sup",
+ "structure_tax_cla_03cla",
+ "structure_tax_cla_04dirpar",
+ "structure_tax_cla_chemontid",
+ "structure_tax_npc_01pat",
+ "structure_tax_npc_02sup",
+ "structure_tax_npc_03cla",
+ "structure_taxonomy_classyfire_chemontid",
+ "structure_taxonomy_classyfire_01kingdom",
+ "structure_taxonomy_classyfire_02superclass",
+ "structure_taxonomy_classyfire_03class",
+ "structure_taxonomy_classyfire_04directparent",
+ "structure_taxonomy_npclassifier_01pathway",
+ "structure_taxonomy_npclassifier_02superclass",
+ "structure_taxonomy_npclassifier_03class",
+ "structure_xlogp",
+ "target_id",
+ "target_inchikey",
+ "target_inchikey_no_stereo",
+ "target_precursorMz",
+ "target_smiles",
+ "target_smiles_no_stereo",
+ "type",
+ "unique_name",
+ "val",
+ "value",
+ "value_max",
+ "value_min"
+ )
+)
diff --git a/R/go_to_cache.R b/R/go_to_cache.R
@@ -9,6 +9,8 @@ import::from(fs, path_home, .into = environment())
 #' @importFrom fs dir_create
 #' @importFrom fs path_home
 #'
+#' @param dir Directory
+#'
 #' @return Goes to cache
 #'
 #' @export

diff --git a/R/weight_bio.R b/R/weight_bio.R
@@ -457,7 +457,7 @@ weight_bio <-
  ) |>
  select(-candidate_score_sirius_csi_tmp) |>
  mutate(
- score_pondered_bio = (1 / (weight_biological + weight_spectral)) * weight_biological * score_biological + (1 / (weight_biological + weight_spectral)) * weight_spectral * candidate_score_pseudo_initial
+ score_weighted_bio = (1 / (weight_biological + weight_spectral)) * weight_biological * score_biological + (1 / (weight_biological + weight_spectral)) * weight_spectral * candidate_score_pseudo_initial
  )
 
  rm(

diff --git a/R/weight_chemo.R b/R/weight_chemo.R
@@ -194,7 +194,7 @@ weight_chemo <-
  right_join(annot_table_wei_bio_clean) |>
  log_pipe("... calculating weighted chemical score \n") |>
  mutate(
- score_pondered_chemo = (1 / (
+ score_weighted_chemo = (1 / (
  weight_chemical + weight_biological + weight_spectral
  )) * weight_chemical * score_chemical + (1 / (
  weight_chemical + weight_biological + weight_spectral

diff --git a/README.Rmd b/README.Rmd
@@ -21,7 +21,7 @@ knitr::opts_chunk$set(
 [![R-CMD-check](https://github.com/taxonomicallyinformedannotation/tima/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/taxonomicallyinformedannotation/tima/actions/workflows/R-CMD-check.yaml)
 [![R-Universe](https://taxonomicallyinformedannotation.r-universe.dev/badges/tima)](https://taxonomicallyinformedannotation.r-universe.dev/tima)
 [![Codecov test coverage](https://codecov.io/gh/taxonomicallyinformedannotation/tima/graph/badge.svg)](https://app.codecov.io/gh/taxonomicallyinformedannotation/tima)
-[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima/)
+[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima-r/)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5797920.svg)](https://doi.org/10.5281/zenodo.5797920)
 <!-- badges: end -->
 

diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ status](https://www.r-pkg.org/badges/version/tima)](https://CRAN.R-project.org/p
 [![R-Universe](https://taxonomicallyinformedannotation.r-universe.dev/badges/tima)](https://taxonomicallyinformedannotation.r-universe.dev/tima)
 [![Codecov test
 coverage](https://codecov.io/gh/taxonomicallyinformedannotation/tima/graph/badge.svg)](https://app.codecov.io/gh/taxonomicallyinformedannotation/tima)
-[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima/)
+[![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/adafede/tima-r/)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5797920.svg)](https://doi.org/10.5281/zenodo.5797920)
 <!-- badges: end -->
 

diff --git a/codemeta.json b/codemeta.json
@@ -2,7 +2,7 @@
  "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
  "@type": "SoftwareSourceCode",
  "identifier": "tima",
- "description": "TIMA provides the infrastructure to perform Taxonomically Informed Metabolite Annotation.",
+ "description": "This package provides the infrastructure to perform Taxonomically Informed Metabolite Annotation.",
  "name": "tima: Taxonomically Informed Metabolite Annotation",
  "relatedLink": "https://taxonomicallyinformedannotation.github.io/tima",
  "codeRepository": "https://github.com/taxonomicallyinformedannotation/tima",
@@ -644,7 +644,7 @@
  "SystemRequirements": null
  },
  "keywords": ["metaboliteannotation", "chemotaxonomy", "scoringsystem", "naturalproducts", "computationalmetabolomics", "taxonomicdistance", "specializedmetabolome"],
- "fileSize": "3166.651KB",
+ "fileSize": "3177.113KB",
  "citation": [
  {
  "@type": "ScholarlyArticle",