From 35ccf58f7cc128e2eae26200dfdbf46b46c6b412 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Mon, 15 Jan 2024 11:12:58 +0100 Subject: [PATCH] feat(docs): list valid nucleotide and amino acid symbols #573 --- lapis2-docs/astro.config.mjs | 8 ++- .../docs/concepts/ambiguous-symbols.mdx | 23 +------- .../references/reference-genomes.mdx | 8 +-- .../nucleotide-and-amino-acid-symbols.mdx | 57 +++++++++++++++++++ ...rence-genome.mdx => reference-genomes.mdx} | 2 +- lapis2-docs/tests/docs.spec.ts | 3 +- 6 files changed, 72 insertions(+), 29 deletions(-) create mode 100644 lapis2-docs/src/content/docs/references/nucleotide-and-amino-acid-symbols.mdx rename lapis2-docs/src/content/docs/references/{reference-genome.mdx => reference-genomes.mdx} (88%) diff --git a/lapis2-docs/astro.config.mjs b/lapis2-docs/astro.config.mjs index 29e84b5f..c7b2ab75 100644 --- a/lapis2-docs/astro.config.mjs +++ b/lapis2-docs/astro.config.mjs @@ -51,8 +51,12 @@ export default defineConfig({ link: '/references/open-api-definition/', }, { - label: 'Reference Genome', - link: '/references/reference-genome/', + label: 'Reference Genomes', + link: '/references/reference-genomes/', + }, + { + label: 'Nucleotide And Amino Acid Symbols', + link: '/references/nucleotide-and-amino-acid-symbols/', }, { label: 'Database Config', diff --git a/lapis2-docs/src/content/docs/concepts/ambiguous-symbols.mdx b/lapis2-docs/src/content/docs/concepts/ambiguous-symbols.mdx index bf2853f6..0f4e172a 100644 --- a/lapis2-docs/src/content/docs/concepts/ambiguous-symbols.mdx +++ b/lapis2-docs/src/content/docs/concepts/ambiguous-symbols.mdx @@ -3,27 +3,8 @@ title: Ambiguous symbols description: Explanation how ambiguous reads are handled in the data --- -The underlying sequence files in `.FASTA` format can contain any of the following symbols: - -| Symbol | Meaning | -| ------ | ----------------- | -| A | Adenine | -| C | Cytosine | -| G | Guanine | -| T | Thymine | -| - | Deletion | -| N | failed read / any | -| R | A or G | -| Y | C or T | -| S | C or G | -| W | A or T | -| K | G or T | -| M | A or C | -| B | not A | -| D | not C | -| H | not G | -| V | not T | - +[The symbols page](/references/nucleotide-and-amino-acid-symbols) +lists all symbols that the underlying sequence files in `.FASTA` format can contain. The ambiguous symbols arise from imperfect reads in the sequencer. While one mostly queries for the symbols `A`, `C`, `G`, `T` and `-` to look for specific features and mutations of a sequence, diff --git a/lapis2-docs/src/content/docs/maintainer-docs/references/reference-genomes.mdx b/lapis2-docs/src/content/docs/maintainer-docs/references/reference-genomes.mdx index ba36e2ab..85074b04 100644 --- a/lapis2-docs/src/content/docs/maintainer-docs/references/reference-genomes.mdx +++ b/lapis2-docs/src/content/docs/maintainer-docs/references/reference-genomes.mdx @@ -39,7 +39,7 @@ The file must contain a JSON object with two keys: A reference sequence is a JSON object that permits the following keys: -| Key | Type | Required | Description | -| -------- | ------ | -------- | ----------------------------------------- | -| name | string | true | The name of the sequence. Must be unique. | -| sequence | string | true | The sequence. | +| Key | Type | Required | Description | +| -------- | ------ | -------- | ----------------------------------------------------------------------------------------------- | +| name | string | true | The name of the sequence. Must be unique. | +| sequence | string | true | The sequence. See [here for allowed characters](/references/nucleotide-and-amino-acid-symbols). | diff --git a/lapis2-docs/src/content/docs/references/nucleotide-and-amino-acid-symbols.mdx b/lapis2-docs/src/content/docs/references/nucleotide-and-amino-acid-symbols.mdx new file mode 100644 index 00000000..81c056d7 --- /dev/null +++ b/lapis2-docs/src/content/docs/references/nucleotide-and-amino-acid-symbols.mdx @@ -0,0 +1,57 @@ +--- +title: Nucleotide And Amino Acid Symbols +description: A reference list of valid symbols +--- + +This page lists valid symbols for nucleotides and amino acids. + +## Nucleotides + +| Symbol | Meaning | Ambiguous Symbol | +| ------ | ----------------- | ---------------- | +| A | Adenine | | +| C | Cytosine | | +| G | Guanine | | +| T | Thymine | | +| - | Deletion | | +| N | failed read / any | | +| R | A or G | ✔ | +| Y | C or T | ✔ | +| S | C or G | ✔ | +| W | A or T | ✔ | +| K | G or T | ✔ | +| M | A or C | ✔ | +| B | not A | ✔ | +| D | not C | ✔ | +| H | not G | ✔ | +| V | not T | ✔ | + +## Amino Acids + +| Symbol | Meaning | Ambiguous Symbol | +| ------ | --------------------------- | ---------------- | +| - | Deletion | | +| A | Alanine | | +| C | Cysteine | | +| D | Aspartic Acid | | +| E | Glutamic Acid | | +| F | Phenylalanine | | +| G | Glycine | | +| H | Histidine | | +| I | Isoleucine | | +| K | Lysine | | +| L | Leucine | | +| M | Methionine | | +| N | Asparagine | | +| P | Proline | | +| Q | Glutamine | | +| R | Arginine | | +| S | Serine | | +| T | Threonine | | +| V | Valine | | +| W | Tryptophan | | +| Y | Tyrosine | | +| \* | Stop codon | | +| B | Aspartic Acid or Asparagine | ✔ | +| Z | Glutamine or Glutamic Acid | ✔ | +| X | Alanine | ✔ | diff --git a/lapis2-docs/src/content/docs/references/reference-genome.mdx b/lapis2-docs/src/content/docs/references/reference-genomes.mdx similarity index 88% rename from lapis2-docs/src/content/docs/references/reference-genome.mdx rename to lapis2-docs/src/content/docs/references/reference-genomes.mdx index ef37ead6..e668f5ba 100644 --- a/lapis2-docs/src/content/docs/references/reference-genome.mdx +++ b/lapis2-docs/src/content/docs/references/reference-genomes.mdx @@ -1,5 +1,5 @@ --- -title: Reference Genome +title: Reference Genomes description: reference genome --- diff --git a/lapis2-docs/tests/docs.spec.ts b/lapis2-docs/tests/docs.spec.ts index 86017018..4a3d84fa 100644 --- a/lapis2-docs/tests/docs.spec.ts +++ b/lapis2-docs/tests/docs.spec.ts @@ -8,7 +8,8 @@ const referencesPages = [ 'Fields', 'Filters', 'Open API / Swagger', - 'Reference Genome', + 'Reference Genomes', + 'Nucleotide And Amino Acid Symbols', 'Database Config', ];