Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use Auspice JSON as a dataset #1455

Merged
merged 20 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
0ec7adc
feat: add ref and annotation data to Auspice tree types
ivan-aksamentov May 13, 2024
1043b98
refactor: add pathogen nextclade extension to auspice tree type
ivan-aksamentov May 16, 2024
4334f32
feat: use Auspice JSON as dataset
ivan-aksamentov May 16, 2024
b843ada
fix: parsing auspice genome annotations
ivan-aksamentov May 16, 2024
ff7e887
fix: off-by-one in landmark range
ivan-aksamentov May 17, 2024
9b952bf
fix: duplicated start and end fields in the annotation of output tree
ivan-aksamentov May 17, 2024
48d163c
feat: accept Auspice JSON genome annotation in read-annotation command
ivan-aksamentov May 17, 2024
1fc4936
refactor: aggregate inputs loading
ivan-aksamentov May 23, 2024
a27ee66
feat(web): add url parameter`dataset-json-url`
ivan-aksamentov May 23, 2024
fb029d5
Merge remote-tracking branch 'origin/master' into feat/ref-and-ann-fr…
ivan-aksamentov May 23, 2024
b1b3f5f
fix(web): prevent crash when an auspice dataset was used in prev session
ivan-aksamentov May 23, 2024
e5ee068
fix(web): prevent crash when auspice json has no `.root_sequence`
ivan-aksamentov May 23, 2024
883a0d6
refactor: lint
ivan-aksamentov May 23, 2024
9f3c1e0
fix(web): specifically accept json
ivan-aksamentov May 24, 2024
fc7b8bd
fix(web): hide "Load examples" button when examples are not in dataset
ivan-aksamentov May 24, 2024
a3c120b
Merge remote-tracking branch 'origin/master' into feat/ref-and-ann-fr…
ivan-aksamentov May 24, 2024
ddd9925
fix: make dataset files optional
ivan-aksamentov May 24, 2024
fe260c6
feat: allow to override dataset components when Auspice dataset
ivan-aksamentov May 24, 2024
82e69a1
fix(web): don't error when ref missing from auspice json but is provi…
ivan-aksamentov May 24, 2024
44fb8a5
feat(web): take title, description and update date from Auspice JSON
ivan-aksamentov May 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 74 additions & 60 deletions packages/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@ use color_eyre::{Section, SectionExt};
use eyre::{eyre, ContextCompat, Report, WrapErr};
use itertools::Itertools;
use log::{warn, LevelFilter};
use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
use nextclade::analyze::virus_properties::VirusProperties;
use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson};
use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
use nextclade::run::nextclade_wasm::NextcladeParams;
use nextclade::run::nextclade_wasm::{NextcladeParams, NextcladeParamsOptional};
use nextclade::tree::tree::AuspiceTree;
use nextclade::utils::fs::list_files_recursive;
use nextclade::utils::option::OptionMapRefFallible;
use nextclade::utils::string::{format_list, surround_with_quotes, Indent};
use nextclade::{make_error, make_internal_error, o};
use std::collections::{BTreeMap, BTreeSet};
use std::collections::BTreeSet;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, Write};
use std::ops::Deref;
Expand All @@ -35,13 +35,16 @@ pub fn nextclade_get_inputs(
if input_dataset.is_file() && has_extension(input_dataset, "zip") {
dataset_zip_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_file() && has_extension(input_dataset, "json") {
dataset_json_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_dir() {
dataset_dir_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else {
make_error!(
"--input-dataset: path is invalid. \
Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'"
Expected a directory path, a zip file path or json file path, but got: '{input_dataset:#?}'"
)
}
} else {
Expand Down Expand Up @@ -119,14 +122,10 @@ pub fn dataset_zip_load(
.wrap_err("When reading pathogen JSON from dataset")?
.ok_or_else(|| eyre!("Pathogen JSON must always be present in the dataset but not found."))?;

let ref_record = read_from_path_or_zip(
&run_args.inputs.input_ref,
&mut zip,
&Some(&virus_properties.files.reference),
)?
.map_ref_fallible(read_one_fasta_str)
.wrap_err("When reading reference sequence from dataset")?
.ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?;
let ref_record = read_from_path_or_zip(&run_args.inputs.input_ref, &mut zip, &virus_properties.files.reference)?
.map_ref_fallible(read_one_fasta_str)
.wrap_err("When reading reference sequence from dataset")?
.ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?;

let gene_map = read_from_path_or_zip(
&run_args.inputs.input_annotation,
Expand Down Expand Up @@ -157,8 +156,8 @@ fn verify_dataset_files<'a, T: AsRef<str> + 'a + ?Sized>(
files_present: impl Iterator<Item = &'a T> + 'a,
) {
let declared: BTreeSet<&str> = [
Some(virus_properties.files.reference.as_str()),
Some(virus_properties.files.pathogen_json.as_str()),
virus_properties.files.reference.as_deref(),
virus_properties.files.pathogen_json.as_deref(),
virus_properties.files.genome_annotation.as_deref(),
virus_properties.files.tree_json.as_deref(),
virus_properties.files.examples.as_deref(),
Expand Down Expand Up @@ -238,8 +237,17 @@ pub fn dataset_dir_load(
let virus_properties = VirusProperties::from_path(input_pathogen_json)?;

let input_ref = input_ref
.clone()
.unwrap_or_else(|| dataset_dir.join(&virus_properties.files.reference));
.as_ref()
.cloned()
.or_else(|| {
virus_properties
.files
.reference
.as_ref()
.map(|reference| dataset_dir.join(reference))
})
.expect("Reference sequence is required but it is neither declared in the dataset's pathogen.json `.files` section, nor provided as a separate file");

let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;

let gene_map = input_annotation
Expand Down Expand Up @@ -283,6 +291,51 @@ pub fn dataset_dir_load(
})
}

pub fn dataset_json_load(
run_args: &NextcladeRunArgs,
dataset_json: impl AsRef<Path>,
cdses: &Option<Vec<String>>,
) -> Result<NextcladeParams, Report> {
let dataset_json = dataset_json.as_ref();

let NextcladeRunInputArgs {
input_ref,
input_tree,
input_pathogen_json,
input_annotation,
..
} = &run_args.inputs;

let auspice_json = AuspiceTree::from_path(dataset_json).wrap_err("When reading Auspice JSON v2")?;

let overrides = {
let virus_properties = input_pathogen_json
.map_ref_fallible(VirusProperties::from_path)
.wrap_err("When parsing pathogen JSON")?;

let ref_record = input_ref
.map_ref_fallible(read_one_fasta)
.wrap_err("When parsing reference sequence")?;

let tree = input_tree
.map_ref_fallible(AuspiceTree::from_path)
.wrap_err("When parsing reference tree Auspice JSON v2")?;

let gene_map = input_annotation
.map_ref_fallible(GeneMap::from_path)
.wrap_err("When parsing genome annotation")?;

NextcladeParamsOptional {
ref_record,
gene_map,
tree,
virus_properties,
}
};

NextcladeParams::from_auspice(&auspice_json, &overrides, cdses)
}

pub fn dataset_individual_files_load(
run_args: &NextcladeRunArgs,
cdses: &Option<Vec<String>>,
Expand All @@ -297,41 +350,7 @@ pub fn dataset_individual_files_load(
.and_then(|input_pathogen_json| read_file_to_string(input_pathogen_json).ok())
.map_ref_fallible(VirusProperties::from_str)
.wrap_err("When reading pathogen JSON")?
.unwrap_or_else(|| {
// The only case where we allow pathogen.json to be missing is when there's no dataset and files are provided
// explicitly through args. Let's create a dummy value to avoid making the field optional,
// and avoid adding `Default` trait.
VirusProperties {
schema_version: "".to_owned(),
attributes: BTreeMap::default(),
shortcuts: vec![],
meta: DatasetMeta::default(),
files: DatasetFiles {
reference: "".to_owned(),
pathogen_json: "".to_owned(),
genome_annotation: None,
tree_json: None,
examples: None,
readme: None,
changelog: None,
rest_files: BTreeMap::default(),
other: serde_json::Value::default(),
},
default_cds: None,
cds_order_preference: vec![],
mut_labels: LabelledMutationsConfig::default(),
qc: None,
general_params: None,
alignment_params: None,
tree_builder_params: None,
phenotype_data: None,
aa_motifs: vec![],
versions: vec![],
version: None,
compatibility: None,
other: serde_json::Value::default(),
}
});
.unwrap_or_default();

let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;

Expand Down Expand Up @@ -401,14 +420,9 @@ pub fn dataset_str_download_and_load(
.wrap_err("When reading pathogen JSON from dataset")?
.ok_or_else(|| eyre!("Required file not found in dataset: 'pathogen.json'. Please report it to dataset authors."))?;

let ref_record = read_from_path_or_url(
&http,
&dataset,
&run_args.inputs.input_ref,
&Some(dataset.files.reference.clone()),
)?
.map_ref_fallible(read_one_fasta_str)?
.wrap_err("When reading reference sequence from dataset")?;
let ref_record = read_from_path_or_url(&http, &dataset, &run_args.inputs.input_ref, &dataset.files.reference)?
.map_ref_fallible(read_one_fasta_str)?
.wrap_err("When reading reference sequence from dataset")?;

let gene_map = read_from_path_or_url(
&http,
Expand Down
3 changes: 1 addition & 2 deletions packages/nextclade-web/src/components/Error/ErrorContent.tsx
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import React, { useCallback, useMemo, useState } from 'react'
import { Button, Col, Row } from 'reactstrap'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import styled from 'styled-components'
import { CopyToClipboard } from 'react-copy-to-clipboard'
import { FaClipboardCheck, FaClipboardList } from 'react-icons/fa'

import { ErrorGeneric } from 'src/components/Error/error-types/ErrorGeneric'
import { ErrorNetworkConnectionFailure } from 'src/components/Error/error-types/ErrorNetworkConnectionFailure'
import { ErrorNetworkRequestFailure } from 'src/components/Error/error-types/ErrorNetworkRequestFailure'
import { NextcladeV2ErrorContent } from 'src/components/Error/error-types/NextcladeV2ErrorContent'
import { ErrorContentExplanation, getErrorReportText } from 'src/components/Error/ErrorContentExplanation'
import { sanitizeError } from 'src/helpers/sanitizeError'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { HttpRequestError } from 'src/io/axiosFetch'
import { ErrorMessageMonospace } from './ErrorStyles'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import React, { useMemo } from 'react'
import { ErrorContainer, ErrorMessage } from 'src/components/Error/ErrorStyles'
import { LinkExternal } from 'src/components/Link/LinkExternal'
import { PROJECT_NAME, RELEASE_OLD_URL } from 'src/constants'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import urljoin from 'url-join'

export interface Props {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Dataset } from '_SchemaRoot'
import { isEmpty } from 'lodash'
import React, { useCallback } from 'react'
import { Button } from 'reactstrap'
import { useRecoilValue } from 'recoil'
Expand Down Expand Up @@ -44,6 +45,10 @@ export function ButtonLoadExample({ ...rest }) {
setExampleSequences(datasetCurrent)
}, [datasetCurrent, setExampleSequences])

if (isEmpty(datasetCurrent?.files?.examples)) {
return null
}

return (
<Button {...rest} color="link" onClick={onClick} disabled={hasInputErrors || !datasetCurrent}>
{t('Load example')}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ export function DatasetContentSection() {
return (
<ContentSection>
<Nav tabs>
{currentDataset?.files.readme && (
{currentDataset?.files?.readme && (
<TabLabel tabId={0} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
{'Summary'}
</TabLabel>
)}
{currentDataset?.files.changelog && (
{currentDataset?.files?.changelog && (
<TabLabel tabId={1} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
{'History'}
</TabLabel>
Expand All @@ -40,10 +40,10 @@ export function DatasetContentSection() {
</Nav>
<TabContent activeTab={activeTabId}>
<TabPane tabId={0}>
{currentDataset?.files.readme && <MarkdownRemote url={currentDataset?.files.readme} />}
{currentDataset?.files?.readme && <MarkdownRemote url={currentDataset?.files.readme} />}
</TabPane>
<TabPane tabId={1}>
{currentDataset?.files.changelog && <MarkdownRemote url={currentDataset?.files.changelog} />}
{currentDataset?.files?.changelog && <MarkdownRemote url={currentDataset?.files.changelog} />}
</TabPane>
<TabPane tabId={2}>{currentDataset && <DatasetContentTabAdvanced />}</TabPane>
</TabContent>
Expand Down
2 changes: 1 addition & 1 deletion packages/nextclade-web/src/components/Main/DatasetInfo.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ export function DatasetInfo({ dataset, showSuggestions, ...restProps }: DatasetI
if (version?.tag === 'unreleased') {
updatedAt = `${updatedAt} (${t('unreleased')})`
}
return updatedAt
return updatedAt ?? t('unknown')
}, [t, version?.tag, version?.updatedAt])

const datasetName = attrStrMaybe(attributes, 'name') ?? path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export function ExampleSequencePicker({ ...restProps }: LanguageSwitcherProps) {
const { datasets: allDatasets } = useRecoilValue(datasetsAtom)

const filtered = useMemo(() => {
const datasets = allDatasets.filter((dataset) => !isNil(dataset.files.examples))
const datasets = allDatasets.filter((dataset) => !isNil(dataset?.files?.examples))
if (searchTerm.trim().length === 0) {
return datasets
}
Expand Down
8 changes: 7 additions & 1 deletion packages/nextclade-web/src/helpers/formatDate.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import { isEmpty } from 'lodash'
import { DateTime } from 'luxon'
import { notUndefinedOrNull } from 'src/helpers/notUndefined'

export function formatDateIsoUtcSimple(dateTimeStr: string) {
const utc = DateTime.fromISO(dateTimeStr, { zone: 'UTC' })

const date = utc.toISODate()

if (isEmpty(date)) {
return undefined
}

const time = utc.toISOTime({
suppressMilliseconds: true,
suppressSeconds: true,
includeOffset: false,
})

return [date, time, `(${utc.zoneName})`].join(' ')
return [date, time, `(${utc.zoneName})`].filter(notUndefinedOrNull).filter(isEmpty).join(' ')
}
Loading