diff --git a/lapis-docs/src/content/docs/concepts/response-format.mdx b/lapis-docs/src/content/docs/concepts/response-format.mdx index 63e3c8a2..50f469b6 100644 --- a/lapis-docs/src/content/docs/concepts/response-format.mdx +++ b/lapis-docs/src/content/docs/concepts/response-format.mdx @@ -11,7 +11,14 @@ A '200' response code indicates a successful request, while any other code signi Endpoints typically support returning data in JSON, [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) or [TSV](https://en.wikipedia.org/wiki/Tab-separated_values), with JSON being the default. Genomic sequences (such as those from `unalignedNucleotideSequences`, `alignedAminoAcidSequences`, etc.) are provided in the -[FASTA format](https://en.wikipedia.org/wiki/FASTA_format). +[FASTA format](https://en.wikipedia.org/wiki/FASTA_format) by default, +but can also be requested in JSON or [NDJSON](https://github.com/ndjson/ndjson-spec) format. + +:::note +NDJSON is in particular useful for downloading large datasets, as it allows for streaming data processing. +You can read line by line without loading the entire file into memory. +Since every line is a valid JSON object, it is usually easier to handle programmatically than FASTA. +::: ## Example diff --git a/lapis-e2e/test/alignedNucleotideSequence.spec.ts b/lapis-e2e/test/alignedNucleotideSequence.spec.ts index 9595d6f9..bf7d4b39 100644 --- a/lapis-e2e/test/alignedNucleotideSequence.spec.ts +++ b/lapis-e2e/test/alignedNucleotideSequence.spec.ts @@ -10,55 +10,49 @@ import { describe('The /alignedNucleotideSequence endpoint', () => { it('should return aligned nucleotide sequences for Switzerland', async () => { const result = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ - nucleotideSequenceRequest: { country: 'Switzerland' }, + nucleotideSequenceRequest: { country: 'Switzerland', dataFormat: 'JSON' }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_3259931'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_3259931'); + expect(result[0].main).to.have.length(29903); }); it('should return aligned nucleotide sequences for multi segmented sequences', async () => { const result = await lapisMultiSegmentedSequenceController.postAlignedNucleotideSequence({ - nucleotideSequenceRequest: { country: 'Switzerland' }, + nucleotideSequenceRequest: { country: 'Switzerland', dataFormat: 'JSON' }, segment: 'M', }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(6); - expect(sequences).to.have.length(6); - expect(primaryKeys[0]).to.equal('>key_5'); - expect(sequences[0]).to.equal('TGGG'); + expect(result).to.have.length(6); + expect(result[0].primaryKey).to.equal('key_5'); + expect(result[0].m).to.equal('TGGG'); }); it('should order ascending by specified fields', async () => { const result = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ - nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }] }, + nucleotideSequenceRequest: { + orderBy: [{ field: 'primaryKey', type: 'ascending' }], + dataFormat: 'JSON', + }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_1001493'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_1001493'); + expect(result[0].main).to.have.length(29903); }); it('should order descending by specified fields', async () => { const result = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ - nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'descending' }] }, + nucleotideSequenceRequest: { + orderBy: [{ field: 'primaryKey', type: 'descending' }], + dataFormat: 'JSON', + }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_931279'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_931279'); + expect(result[0].main).to.have.length(29903); }); it('should apply limit and offset', async () => { @@ -66,16 +60,13 @@ describe('The /alignedNucleotideSequence endpoint', () => { nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, + dataFormat: 'JSON', }, }); - const { primaryKeys: primaryKeysWithLimit, sequences: sequencesWithLimit } = - sequenceData(resultWithLimit); - - expect(primaryKeysWithLimit).to.have.length(2); - expect(sequencesWithLimit).to.have.length(2); - expect(primaryKeysWithLimit[0]).to.equal('>key_1001493'); - expect(sequencesWithLimit[0]).to.have.length(29903); + expect(resultWithLimit).to.have.length(2); + expect(resultWithLimit[0].primaryKey).to.equal('key_1001493'); + expect(resultWithLimit[0].main).to.have.length(29903); const resultWithLimitAndOffset = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ @@ -83,44 +74,36 @@ describe('The /alignedNucleotideSequence endpoint', () => { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, offset: 1, + dataFormat: 'JSON', }, }); - const { primaryKeys: primaryKeysWithLimitAndOffset, sequences: sequencesWithLimitAndOffset } = - sequenceData(resultWithLimitAndOffset); - - expect(primaryKeysWithLimitAndOffset).to.have.length(2); - expect(sequencesWithLimitAndOffset).to.have.length(2); - expect(primaryKeysWithLimitAndOffset[0]).to.equal(primaryKeysWithLimit[1]); - expect(sequencesWithLimitAndOffset[0]).to.equal(sequencesWithLimit[1]); + expect(resultWithLimitAndOffset).to.have.length(2); + expect(resultWithLimitAndOffset[0]).to.deep.equal(resultWithLimit[1]); }); it('should correctly handle nucleotide insertion requests', async () => { const result = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ nucleotideSequenceRequest: { nucleotideInsertions: ['ins_25701:CC?', 'ins_5959:?AT'], + dataFormat: 'JSON', }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3578231'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3578231'); }); it('should correctly handle amino acid insertion requests', async () => { const result = await lapisSingleSegmentedSequenceController.postAlignedNucleotideSequence({ nucleotideSequenceRequest: { aminoAcidInsertions: ['ins_S:143:T', 'ins_ORF1a:3602:F?P'], + dataFormat: 'JSON', }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3259931'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3259931'); }); it('should return an empty zstd compressed file', async () => { diff --git a/lapis-e2e/test/aminoAcidSequence.spec.ts b/lapis-e2e/test/aminoAcidSequence.spec.ts index 716c96ef..f63171d9 100644 --- a/lapis-e2e/test/aminoAcidSequence.spec.ts +++ b/lapis-e2e/test/aminoAcidSequence.spec.ts @@ -1,47 +1,41 @@ import { expect } from 'chai'; -import { lapisClient, sequenceData } from './common'; +import { lapisClient } from './common'; describe('The /alignedAminoAcidSequence endpoint', () => { it('should return amino acid sequences for Switzerland', async () => { const result = await lapisClient.postAlignedAminoAcidSequence({ gene: 'S', - aminoAcidSequenceRequest: { country: 'Switzerland' }, + aminoAcidSequenceRequest: { country: 'Switzerland', dataFormat: 'JSON' }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_3259931'); - expect(sequences[0]).to.have.length(1274); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_3259931'); + expect(result[0].s).to.have.length(1274); }); it('should order ascending by specified fields', async () => { const result = await lapisClient.postAlignedAminoAcidSequence({ gene: 'S', - aminoAcidSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }] }, + aminoAcidSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }], dataFormat: 'JSON' }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_1001493'); - expect(sequences[0]).to.have.length(1274); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_1001493'); + expect(result[0].s).to.have.length(1274); }); it('should order descending by specified fields', async () => { const result = await lapisClient.postAlignedAminoAcidSequence({ gene: 'S', - aminoAcidSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'descending' }] }, + aminoAcidSequenceRequest: { + orderBy: [{ field: 'primaryKey', type: 'descending' }], + dataFormat: 'JSON', + }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_931279'); - expect(sequences[0]).to.have.length(1274); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_931279'); + expect(result[0].s).to.have.length(1274); }); it('should apply limit and offset', async () => { @@ -50,16 +44,13 @@ describe('The /alignedAminoAcidSequence endpoint', () => { aminoAcidSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, + dataFormat: 'JSON', }, }); - const { primaryKeys: primaryKeysWithLimit, sequences: sequencesWithLimit } = - sequenceData(resultWithLimit); - - expect(primaryKeysWithLimit).to.have.length(2); - expect(sequencesWithLimit).to.have.length(2); - expect(primaryKeysWithLimit[0]).to.equal('>key_1001493'); - expect(sequencesWithLimit[0]).to.have.length(1274); + expect(resultWithLimit).to.have.length(2); + expect(resultWithLimit[0].primaryKey).to.equal('key_1001493'); + expect(resultWithLimit[0].s).to.have.length(1274); const resultWithLimitAndOffset = await lapisClient.postAlignedAminoAcidSequence({ gene: 'S', @@ -67,16 +58,12 @@ describe('The /alignedAminoAcidSequence endpoint', () => { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, offset: 1, + dataFormat: 'JSON', }, }); - const { primaryKeys: primaryKeysWithLimitAndOffset, sequences: sequencesWithLimitAndOffset } = - sequenceData(resultWithLimitAndOffset); - - expect(primaryKeysWithLimitAndOffset).to.have.length(2); - expect(sequencesWithLimitAndOffset).to.have.length(2); - expect(primaryKeysWithLimitAndOffset[0]).to.equal(primaryKeysWithLimit[1]); - expect(sequencesWithLimitAndOffset[0]).to.equal(sequencesWithLimit[1]); + expect(resultWithLimitAndOffset).to.have.length(2); + expect(resultWithLimitAndOffset[0]).to.deep.equal(resultWithLimit[1]); }); it('should correctly handle nucleotide insertion requests', async () => { @@ -84,15 +71,13 @@ describe('The /alignedAminoAcidSequence endpoint', () => { gene: 'S', aminoAcidSequenceRequest: { nucleotideInsertions: ['ins_25701:CC?', 'ins_5959:?AT'], + dataFormat: 'JSON', }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3578231'); - expect(sequences[0]).to.have.length(1274); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3578231'); + expect(result[0].s).to.have.length(1274); }); it('should correctly handle amino acid insertion requests', async () => { @@ -100,13 +85,11 @@ describe('The /alignedAminoAcidSequence endpoint', () => { gene: 'S', aminoAcidSequenceRequest: { aminoAcidInsertions: ['ins_S:143:T', 'ins_ORF1a:3602:F?P'], + dataFormat: 'JSON', }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3259931'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3259931'); }); }); diff --git a/lapis-e2e/test/common.spec.ts b/lapis-e2e/test/common.spec.ts index 93cab158..d4ba2f98 100644 --- a/lapis-e2e/test/common.spec.ts +++ b/lapis-e2e/test/common.spec.ts @@ -1,5 +1,5 @@ import { expect } from 'chai'; -import { basePath, expectIsGzipEncoded, expectIsZstdEncoded } from './common'; +import { basePath, expectIsGzipEncoded, expectIsZstdEncoded, sequenceData } from './common'; const routes = [ { pathSegment: '/aggregated', servesFasta: false, expectedDownloadFilename: 'aggregated.json' }, @@ -83,7 +83,25 @@ describe('All endpoints', () => { expect(response.headers.get('lapis-data-version')).to.match(/\d{10}/); }); - if (!route.servesFasta) { + if (route.servesFasta) { + it('should return sequences in fasta format', async () => { + const response = await get(new URLSearchParams({ dataFormat: 'fasta' })); + + const { primaryKeys, sequences } = sequenceData(await response.text()); + + expect(primaryKeys).to.have.length(100); + expect(sequences).to.have.length(100); + }); + + it('should return sequences in ndjson format', async () => { + const response = await get(new URLSearchParams({ dataFormat: 'ndjson' })); + + const lines = (await response.text()).split('\n').filter(line => line.length > 0); + + expect(lines).to.have.length(100); + expect(JSON.parse(lines[0])).to.have.property('primaryKey'); + }); + } else { it('should return the lapis data version header for CSV data', async () => { const response = await get(new URLSearchParams({ dataFormat: 'csv' })); diff --git a/lapis-e2e/test/unalignedNucleotideSequence.spec.ts b/lapis-e2e/test/unalignedNucleotideSequence.spec.ts index f80bc88b..cb4b0685 100644 --- a/lapis-e2e/test/unalignedNucleotideSequence.spec.ts +++ b/lapis-e2e/test/unalignedNucleotideSequence.spec.ts @@ -1,44 +1,41 @@ import { expect } from 'chai'; -import { basePath, lapisSingleSegmentedSequenceController, sequenceData } from './common'; +import { lapisSingleSegmentedSequenceController } from './common'; describe('The /unalignedNucleotideSequence endpoint', () => { it('should return unaligned nucleotide sequences for Switzerland', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ - nucleotideSequenceRequest: { country: 'Switzerland' }, + nucleotideSequenceRequest: { country: 'Switzerland', dataFormat: 'JSON' }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys, 'primaryKeys').to.have.length(100); - expect(sequences, 'sequences').to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_3086369'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_3086369'); + expect(result[0].main).to.have.length(29903); }); it('should order ascending by specified fields', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ - nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }] }, + nucleotideSequenceRequest: { + orderBy: [{ field: 'primaryKey', type: 'ascending' }], + dataFormat: 'JSON', + }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_1001493'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_1001493'); + expect(result[0].main).to.have.length(29903); }); it('should order descending by specified fields', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ - nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'descending' }] }, + nucleotideSequenceRequest: { + orderBy: [{ field: 'primaryKey', type: 'descending' }], + dataFormat: 'JSON', + }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(100); - expect(sequences).to.have.length(100); - expect(primaryKeys[0]).to.equal('>key_931279'); - expect(sequences[0]).to.have.length(29903); + expect(result).to.have.length(100); + expect(result[0].primaryKey).to.equal('key_931279'); + expect(result[0].main).to.have.length(29903); }); it('should apply limit and offset', async () => { @@ -46,16 +43,12 @@ describe('The /unalignedNucleotideSequence endpoint', () => { nucleotideSequenceRequest: { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, + dataFormat: 'JSON', }, }); - - const { primaryKeys: primaryKeysWithLimit, sequences: sequencesWithLimit } = - sequenceData(resultWithLimit); - - expect(primaryKeysWithLimit).to.have.length(2); - expect(sequencesWithLimit).to.have.length(2); - expect(primaryKeysWithLimit[0]).to.equal('>key_1001493'); - expect(sequencesWithLimit[0]).to.have.length(29903); + expect(resultWithLimit).to.have.length(2); + expect(resultWithLimit[0].primaryKey).to.equal('key_1001493'); + expect(resultWithLimit[0].main).to.have.length(29903); const resultWithLimitAndOffset = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ @@ -63,56 +56,42 @@ describe('The /unalignedNucleotideSequence endpoint', () => { orderBy: [{ field: 'primaryKey', type: 'ascending' }], limit: 2, offset: 1, + dataFormat: 'JSON', }, }); - - const { primaryKeys: primaryKeysWithLimitAndOffset, sequences: sequencesWithLimitAndOffset } = - sequenceData(resultWithLimitAndOffset); - - expect(primaryKeysWithLimitAndOffset).to.have.length(2); - expect(sequencesWithLimitAndOffset).to.have.length(2); - expect(primaryKeysWithLimitAndOffset[0]).to.equal(primaryKeysWithLimit[1]); - expect(sequencesWithLimitAndOffset[0]).to.equal(sequencesWithLimit[1]); + expect(resultWithLimitAndOffset).to.have.length(2); + expect(resultWithLimitAndOffset[0]).to.deep.equal(resultWithLimit[1]); }); it('should correctly handle nucleotide insertion requests', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ nucleotideSequenceRequest: { nucleotideInsertions: ['ins_25701:CC?', 'ins_5959:?AT'], + dataFormat: 'JSON', }, }); - - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3578231'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3578231'); }); it('should correctly handle amino acid insertion requests', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ nucleotideSequenceRequest: { aminoAcidInsertions: ['ins_S:143:T', 'ins_ORF1a:3602:F?P'], + dataFormat: 'JSON', }, }); - - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_3259931'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_3259931'); }); it('should return the short sequence', async () => { const result = await lapisSingleSegmentedSequenceController.postUnalignedNucleotideSequence({ - nucleotideSequenceRequest: { primaryKey: 'key_1749899' }, + nucleotideSequenceRequest: { primaryKey: 'key_1749899', dataFormat: 'JSON' }, }); - const { primaryKeys, sequences } = sequenceData(result); - - expect(primaryKeys).to.have.length(1); - expect(sequences).to.have.length(1); - expect(primaryKeys[0]).to.equal('>key_1749899'); - expect(sequences[0]).to.equal('some_very_short_string'); + expect(result).to.have.length(1); + expect(result[0].primaryKey).to.equal('key_1749899'); + expect(result[0].main).to.equal('some_very_short_string'); }); }); diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/ControllerDescriptions.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/ControllerDescriptions.kt index d645deb0..087ae4d6 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/ControllerDescriptions.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/ControllerDescriptions.kt @@ -23,19 +23,19 @@ const val INFO_ENDPOINT_DESCRIPTION = "Returns information about LAPIS." const val DATABASE_CONFIG_ENDPOINT_DESCRIPTION = "Returns the database configuration." const val REFERENCE_GENOME_ENDPOINT_DESCRIPTION = "Returns the reference genome." const val ALIGNED_AMINO_ACID_SEQUENCE_ENDPOINT_DESCRIPTION = - """Returns a string of fasta formatted aligned amino acid sequences. Only sequences matching the specified + """Returns a string of aligned amino acid sequences. Only sequences matching the specified sequence filters are considered.""" const val ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION = - """Returns a string of fasta formatted aligned nucleotide sequences. Only sequences matching the + """Returns a string of aligned nucleotide sequences. Only sequences matching the specified sequence filters are considered.""" const val UNALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION = - """Returns a string of fasta formatted unaligned nucleotide sequences. Only sequences matching the + """Returns a string of unaligned nucleotide sequences. Only sequences matching the specified sequence filters are considered.""" const val ALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION = - """Returns a string of fasta formatted aligned nucleotide sequences of the requested segment. + """Returns a string of aligned nucleotide sequences of the requested segment. Only sequences matching the specified sequence filters are considered.""" const val UNALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION = - """Returns a string of fasta formatted unaligned nucleotide sequences of the requested segment. + """Returns a string of unaligned nucleotide sequences of the requested segment. Only sequences matching the specified sequence filters are considered. Sequences that don't have an unaligned sequence for this segment are omitted.""" @@ -57,11 +57,16 @@ const val LIMIT_DESCRIPTION = """The maximum number of entries to return in the const val OFFSET_DESCRIPTION = """The offset of the first entry to return in the response. This is useful for pagination in combination with \"limit\".""" -const val FORMAT_DESCRIPTION = """The data format of the response. +const val DATA_FORMAT_DESCRIPTION = """The data format of the response. Alternatively, the data format can be specified by setting the \"Accept\"-header. You can include the parameter to return the CSV/TSV without headers: "$TEXT_CSV_WITHOUT_HEADERS_VALUE". When both are specified, the request parameter takes precedence over the header.""" +const val SEQUENCES_DATA_FORMAT_DESCRIPTION = """The format of the sequences in the response. +Alternatively, the data format can be specified by setting the \"Accept\"-header. +When both are specified, the request parameter takes precedence over the header. +""" + private const val MAYBE_DESCRIPTION = """ A mutation can be wrapped in a maybe expression "MAYBE(\)" to include sequences with ambiguous symbols at the given position. diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/LapisController.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/LapisController.kt index 3f1db0c0..158a996d 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/LapisController.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/LapisController.kt @@ -40,6 +40,7 @@ import org.genspectrum.lapis.openApi.NucleotideMutations import org.genspectrum.lapis.openApi.Offset import org.genspectrum.lapis.openApi.PrimitiveFieldFilters import org.genspectrum.lapis.openApi.REQUEST_SCHEMA_WITH_MIN_PROPORTION +import org.genspectrum.lapis.openApi.SequencesDataFormat import org.genspectrum.lapis.openApi.StringResponseOperation import org.genspectrum.lapis.request.AminoAcidInsertion import org.genspectrum.lapis.request.AminoAcidMutation @@ -66,7 +67,7 @@ import org.genspectrum.lapis.response.DetailsData import org.genspectrum.lapis.response.LapisResponse import org.genspectrum.lapis.response.NucleotideInsertionResponse import org.genspectrum.lapis.response.NucleotideMutationResponse -import org.genspectrum.lapis.response.writeFastaTo +import org.genspectrum.lapis.response.SequencesStreamer import org.genspectrum.lapis.silo.DataVersion import org.genspectrum.lapis.silo.SequenceType import org.springframework.http.HttpHeaders @@ -90,6 +91,7 @@ class LapisController( private val csvWriter: CsvWriter, private val fieldConverter: FieldConverter, private val dataVersion: DataVersion, + private val sequencesStreamer: SequencesStreamer, ) { @GetMapping(AGGREGATED_ROUTE, produces = [MediaType.APPLICATION_JSON_VALUE]) @LapisAggregatedResponse @@ -1403,7 +1405,10 @@ class LapisController( writeCsvToResponse(response, request, httpHeaders.accept, TAB, siloQueryModel::getAminoAcidInsertions) } - @GetMapping("$ALIGNED_AMINO_ACID_SEQUENCES_ROUTE/{gene}", produces = [TEXT_X_FASTA_VALUE]) + @GetMapping( + "$ALIGNED_AMINO_ACID_SEQUENCES_ROUTE/{gene}", + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], + ) @LapisAlignedAminoAcidSequenceResponse fun getAlignedAminoAcidSequence( @PathVariable(name = "gene", required = true) @@ -1433,6 +1438,10 @@ class LapisController( @Offset @RequestParam offset: Int? = null, + @SequencesDataFormat + @RequestParam + dataFormat: String? = null, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { val request = SequenceFiltersRequest( @@ -1448,13 +1457,19 @@ class LapisController( requestContext.filter = request - return siloQueryModel.getGenomicSequence(request, SequenceType.ALIGNED, gene) - .writeFastaTo(response, dataVersion) + siloQueryModel.getGenomicSequence(request, SequenceType.ALIGNED, gene) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } @PostMapping( "$ALIGNED_AMINO_ACID_SEQUENCES_ROUTE/{gene}", - produces = [TEXT_X_FASTA_VALUE], + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], consumes = [MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_FORM_URLENCODED_VALUE], ) @LapisAlignedAminoAcidSequenceResponse @@ -1465,12 +1480,19 @@ class LapisController( @Parameter(schema = Schema(ref = "#/components/schemas/$ALIGNED_AMINO_ACID_SEQUENCE_REQUEST_SCHEMA")) @RequestBody request: SequenceFiltersRequest, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { requestContext.filter = request - return siloQueryModel.getGenomicSequence(request, SequenceType.ALIGNED, gene) - .writeFastaTo(response, dataVersion) + siloQueryModel.getGenomicSequence(request, SequenceType.ALIGNED, gene) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } private fun writeCsvToResponse( diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceController.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceController.kt index 3fd8fcc1..4bad23f8 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceController.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceController.kt @@ -9,8 +9,7 @@ import org.genspectrum.lapis.logging.RequestContext import org.genspectrum.lapis.model.SiloQueryModel import org.genspectrum.lapis.openApi.AminoAcidInsertions import org.genspectrum.lapis.openApi.AminoAcidMutations -import org.genspectrum.lapis.openApi.LapisAlignedMultiSegmentedNucleotideSequenceResponse -import org.genspectrum.lapis.openApi.LapisUnalignedMultiSegmentedNucleotideSequenceResponse +import org.genspectrum.lapis.openApi.LapisNucleotideSequenceResponse import org.genspectrum.lapis.openApi.Limit import org.genspectrum.lapis.openApi.NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA import org.genspectrum.lapis.openApi.NucleotideInsertions @@ -19,6 +18,7 @@ import org.genspectrum.lapis.openApi.NucleotideSequencesOrderByFields import org.genspectrum.lapis.openApi.Offset import org.genspectrum.lapis.openApi.PrimitiveFieldFilters import org.genspectrum.lapis.openApi.Segment +import org.genspectrum.lapis.openApi.SequencesDataFormat import org.genspectrum.lapis.request.AminoAcidInsertion import org.genspectrum.lapis.request.AminoAcidMutation import org.genspectrum.lapis.request.GetRequestSequenceFilters @@ -27,15 +27,16 @@ import org.genspectrum.lapis.request.NucleotideMutation import org.genspectrum.lapis.request.OrderByField import org.genspectrum.lapis.request.SPECIAL_REQUEST_PROPERTIES import org.genspectrum.lapis.request.SequenceFiltersRequest -import org.genspectrum.lapis.response.writeFastaTo -import org.genspectrum.lapis.silo.DataVersion +import org.genspectrum.lapis.response.SequencesStreamer import org.genspectrum.lapis.silo.SequenceType import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression +import org.springframework.http.HttpHeaders import org.springframework.http.MediaType import org.springframework.web.bind.annotation.GetMapping import org.springframework.web.bind.annotation.PathVariable import org.springframework.web.bind.annotation.PostMapping import org.springframework.web.bind.annotation.RequestBody +import org.springframework.web.bind.annotation.RequestHeader import org.springframework.web.bind.annotation.RequestMapping import org.springframework.web.bind.annotation.RequestParam import org.springframework.web.bind.annotation.RestController @@ -49,10 +50,15 @@ const val IS_MULTI_SEGMENT_SEQUENCE_EXPRESSION = class MultiSegmentedSequenceController( private val siloQueryModel: SiloQueryModel, private val requestContext: RequestContext, - private val dataVersion: DataVersion, + private val sequencesStreamer: SequencesStreamer, ) { - @GetMapping("$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", produces = [TEXT_X_FASTA_VALUE]) - @LapisAlignedMultiSegmentedNucleotideSequenceResponse + @GetMapping( + "$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], + ) + @LapisNucleotideSequenceResponse( + description = ALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun getAlignedNucleotideSequence( @PathVariable(name = "segment", required = true) @Segment @@ -81,6 +87,10 @@ class MultiSegmentedSequenceController( @Offset @RequestParam offset: Int? = null, + @SequencesDataFormat + @RequestParam + dataFormat: String? = null, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { val request = SequenceFiltersRequest( @@ -96,20 +106,28 @@ class MultiSegmentedSequenceController( requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.ALIGNED, segment, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } @PostMapping( "$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", - produces = [TEXT_X_FASTA_VALUE], + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], consumes = [MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_FORM_URLENCODED_VALUE], ) - @LapisAlignedMultiSegmentedNucleotideSequenceResponse + @LapisNucleotideSequenceResponse( + description = ALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun postAlignedNucleotideSequence( @PathVariable(name = "segment", required = true) @Segment @@ -117,20 +135,32 @@ class MultiSegmentedSequenceController( @Parameter(schema = Schema(ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA")) @RequestBody request: SequenceFiltersRequest, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.ALIGNED, segment, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } - @GetMapping("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", produces = [TEXT_X_FASTA_VALUE]) - @LapisUnalignedMultiSegmentedNucleotideSequenceResponse + @GetMapping( + "$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], + ) + @LapisNucleotideSequenceResponse( + description = UNALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun getUnalignedNucleotideSequence( @PathVariable(name = "segment", required = true) @Segment @@ -159,6 +189,10 @@ class MultiSegmentedSequenceController( @Offset @RequestParam offset: Int? = null, + @SequencesDataFormat + @RequestParam + dataFormat: String? = null, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { val request = SequenceFiltersRequest( @@ -174,20 +208,28 @@ class MultiSegmentedSequenceController( requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.UNALIGNED, segment, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } @PostMapping( "$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/{segment}", - produces = [TEXT_X_FASTA_VALUE], + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], consumes = [MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_FORM_URLENCODED_VALUE], ) - @LapisUnalignedMultiSegmentedNucleotideSequenceResponse + @LapisNucleotideSequenceResponse( + description = UNALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun postUnalignedNucleotideSequence( @PathVariable(name = "segment", required = true) @Segment @@ -195,15 +237,22 @@ class MultiSegmentedSequenceController( @Parameter(schema = Schema(ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA")) @RequestBody request: SequenceFiltersRequest, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.UNALIGNED, segment, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } } diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceController.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceController.kt index 74c0ff2b..4b492dff 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceController.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceController.kt @@ -10,8 +10,7 @@ import org.genspectrum.lapis.logging.RequestContext import org.genspectrum.lapis.model.SiloQueryModel import org.genspectrum.lapis.openApi.AminoAcidInsertions import org.genspectrum.lapis.openApi.AminoAcidMutations -import org.genspectrum.lapis.openApi.LapisAlignedSingleSegmentedNucleotideSequenceResponse -import org.genspectrum.lapis.openApi.LapisUnalignedSingleSegmentedNucleotideSequenceResponse +import org.genspectrum.lapis.openApi.LapisNucleotideSequenceResponse import org.genspectrum.lapis.openApi.Limit import org.genspectrum.lapis.openApi.NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA import org.genspectrum.lapis.openApi.NucleotideInsertions @@ -19,6 +18,7 @@ import org.genspectrum.lapis.openApi.NucleotideMutations import org.genspectrum.lapis.openApi.NucleotideSequencesOrderByFields import org.genspectrum.lapis.openApi.Offset import org.genspectrum.lapis.openApi.PrimitiveFieldFilters +import org.genspectrum.lapis.openApi.SequencesDataFormat import org.genspectrum.lapis.request.AminoAcidInsertion import org.genspectrum.lapis.request.AminoAcidMutation import org.genspectrum.lapis.request.GetRequestSequenceFilters @@ -27,14 +27,15 @@ import org.genspectrum.lapis.request.NucleotideMutation import org.genspectrum.lapis.request.OrderByField import org.genspectrum.lapis.request.SPECIAL_REQUEST_PROPERTIES import org.genspectrum.lapis.request.SequenceFiltersRequest -import org.genspectrum.lapis.response.writeFastaTo -import org.genspectrum.lapis.silo.DataVersion +import org.genspectrum.lapis.response.SequencesStreamer import org.genspectrum.lapis.silo.SequenceType import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression +import org.springframework.http.HttpHeaders import org.springframework.http.MediaType import org.springframework.web.bind.annotation.GetMapping import org.springframework.web.bind.annotation.PostMapping import org.springframework.web.bind.annotation.RequestBody +import org.springframework.web.bind.annotation.RequestHeader import org.springframework.web.bind.annotation.RequestMapping import org.springframework.web.bind.annotation.RequestParam import org.springframework.web.bind.annotation.RestController @@ -49,10 +50,15 @@ class SingleSegmentedSequenceController( private val siloQueryModel: SiloQueryModel, private val requestContext: RequestContext, private val referenceGenomeSchema: ReferenceGenomeSchema, - private val dataVersion: DataVersion, + private val sequencesStreamer: SequencesStreamer, ) { - @GetMapping(ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, produces = [TEXT_X_FASTA_VALUE]) - @LapisAlignedSingleSegmentedNucleotideSequenceResponse + @GetMapping( + ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], + ) + @LapisNucleotideSequenceResponse( + description = ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun getAlignedNucleotideSequences( @PrimitiveFieldFilters @RequestParam @@ -78,6 +84,10 @@ class SingleSegmentedSequenceController( @Offset @RequestParam offset: Int? = null, + @SequencesDataFormat + @RequestParam + dataFormat: String? = null, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { val request = SequenceFiltersRequest( @@ -93,38 +103,58 @@ class SingleSegmentedSequenceController( requestContext.filter = request - val genomicSequence = siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.ALIGNED, referenceGenomeSchema.nucleotideSequences[0].name, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } @PostMapping( ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, - produces = [TEXT_X_FASTA_VALUE], + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], consumes = [MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_FORM_URLENCODED_VALUE], ) - @LapisAlignedSingleSegmentedNucleotideSequenceResponse + @LapisNucleotideSequenceResponse( + description = ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun postAlignedNucleotideSequence( @Parameter(schema = Schema(ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA")) @RequestBody request: SequenceFiltersRequest, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.ALIGNED, referenceGenomeSchema.nucleotideSequences[0].name, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } - @GetMapping(UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, produces = [TEXT_X_FASTA_VALUE]) - @LapisUnalignedSingleSegmentedNucleotideSequenceResponse + @GetMapping( + UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], + ) + @LapisNucleotideSequenceResponse( + description = UNALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun getUnalignedNucleotideSequences( @PrimitiveFieldFilters @RequestParam @@ -150,6 +180,10 @@ class SingleSegmentedSequenceController( @Offset @RequestParam offset: Int? = null, + @SequencesDataFormat + @RequestParam + dataFormat: String? = null, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { val request = SequenceFiltersRequest( @@ -165,33 +199,48 @@ class SingleSegmentedSequenceController( requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.UNALIGNED, referenceGenomeSchema.nucleotideSequences[0].name, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } @PostMapping( UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE, - produces = [TEXT_X_FASTA_VALUE], + produces = [TEXT_X_FASTA_VALUE, MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_NDJSON_VALUE], consumes = [MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_FORM_URLENCODED_VALUE], ) - @LapisUnalignedSingleSegmentedNucleotideSequenceResponse + @LapisNucleotideSequenceResponse( + description = UNALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, + ) fun postUnalignedNucleotideSequence( @Parameter(schema = Schema(ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA")) @RequestBody request: SequenceFiltersRequest, + @RequestHeader httpHeaders: HttpHeaders, response: HttpServletResponse, ) { requestContext.filter = request - return siloQueryModel.getGenomicSequence( + siloQueryModel.getGenomicSequence( request, SequenceType.UNALIGNED, referenceGenomeSchema.nucleotideSequences[0].name, ) - .writeFastaTo(response, dataVersion) + .also { + sequencesStreamer.stream( + sequenceData = it, + response = response, + acceptHeaders = httpHeaders.accept, + ) + } } } diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DataFormatParameterFilter.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DataFormatParameterFilter.kt index ee5c4a4d..972b4560 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DataFormatParameterFilter.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DataFormatParameterFilter.kt @@ -23,6 +23,12 @@ object DataFormat { const val TSV = "TSV" } +object SequencesDataFormat { + const val JSON = DataFormat.JSON + const val FASTA = "FASTA" + const val NDJSON = "NDJSON" +} + @Component @Order(DATA_FORMAT_FILTER_ORDER) class DataFormatParameterFilter(val objectMapper: ObjectMapper) : OncePerRequestFilter() { @@ -51,6 +57,9 @@ class DataFormatParameterFilter(val objectMapper: ObjectMapper) : OncePerRequest DataFormat.CSV_WITHOUT_HEADERS -> LapisMediaType.TEXT_CSV_WITHOUT_HEADERS_VALUE DataFormat.TSV -> LapisMediaType.TEXT_TSV_VALUE DataFormat.JSON -> MediaType.APPLICATION_JSON_VALUE + SequencesDataFormat.FASTA -> LapisMediaType.TEXT_X_FASTA_VALUE + SequencesDataFormat.NDJSON -> MediaType.APPLICATION_NDJSON_VALUE + else -> null } } diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DownloadAsFileFilter.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DownloadAsFileFilter.kt index a7f440d1..64f42f97 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DownloadAsFileFilter.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/controller/middleware/DownloadAsFileFilter.kt @@ -6,6 +6,7 @@ import jakarta.servlet.http.HttpServletRequest import jakarta.servlet.http.HttpServletResponse import org.genspectrum.lapis.controller.LapisMediaType.TEXT_CSV import org.genspectrum.lapis.controller.LapisMediaType.TEXT_TSV +import org.genspectrum.lapis.controller.LapisMediaType.TEXT_X_FASTA import org.genspectrum.lapis.controller.SampleRoute import org.genspectrum.lapis.request.DOWNLOAD_AS_FILE_PROPERTY import org.genspectrum.lapis.request.DOWNLOAD_FILE_BASENAME_PROPERTY @@ -59,14 +60,23 @@ class DownloadAsFileFilter( request: CachedBodyHttpServletRequest, matchingRoute: SampleRoute?, ): String { - if (matchingRoute?.servesFasta == true) { - return ".fasta" - } - val acceptHeaders = request.getHeader(ACCEPT) ?.let { MediaType.parseMediaTypes(it) } ?.sortedByDescending { it.qualityValue } ?: emptyList() + + if (matchingRoute?.servesFasta == true) { + for (acceptHeader in acceptHeaders) { + if (acceptHeader.equalsTypeAndSubtype(TEXT_X_FASTA)) { + return ".fasta" + } else if (acceptHeader.equalsTypeAndSubtype(MediaType.APPLICATION_NDJSON)) { + return ".ndjson" + } else if (acceptHeader.equalsTypeAndSubtype(MediaType.APPLICATION_JSON)) { + return ".json" + } + } + return ".fasta" + } for (acceptHeader in acceptHeaders) { if (acceptHeader.equalsTypeAndSubtype(TEXT_TSV)) { return ".tsv" diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/OpenApiDocs.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/OpenApiDocs.kt index c5ae73da..c2d63c0f 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/OpenApiDocs.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/OpenApiDocs.kt @@ -10,21 +10,25 @@ import io.swagger.v3.oas.models.media.Schema import io.swagger.v3.oas.models.media.StringSchema import org.genspectrum.lapis.config.DatabaseConfig import org.genspectrum.lapis.config.DatabaseMetadata +import org.genspectrum.lapis.config.DatabaseSchema import org.genspectrum.lapis.config.MetadataType import org.genspectrum.lapis.config.OpennessLevel import org.genspectrum.lapis.config.ReferenceGenomeSchema +import org.genspectrum.lapis.config.ReferenceSequenceSchema import org.genspectrum.lapis.config.SequenceFilterFieldName import org.genspectrum.lapis.config.SequenceFilterFieldType import org.genspectrum.lapis.config.SequenceFilterFields import org.genspectrum.lapis.controller.AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION import org.genspectrum.lapis.controller.AMINO_ACID_MUTATION_DESCRIPTION +import org.genspectrum.lapis.controller.DATA_FORMAT_DESCRIPTION import org.genspectrum.lapis.controller.DETAILS_FIELDS_DESCRIPTION -import org.genspectrum.lapis.controller.FORMAT_DESCRIPTION import org.genspectrum.lapis.controller.LIMIT_DESCRIPTION import org.genspectrum.lapis.controller.NUCLEOTIDE_MUTATION_DESCRIPTION import org.genspectrum.lapis.controller.OFFSET_DESCRIPTION +import org.genspectrum.lapis.controller.SEQUENCES_DATA_FORMAT_DESCRIPTION import org.genspectrum.lapis.controller.middleware.Compression import org.genspectrum.lapis.controller.middleware.DataFormat +import org.genspectrum.lapis.controller.middleware.SequencesDataFormat import org.genspectrum.lapis.request.ACCESS_KEY_PROPERTY import org.genspectrum.lapis.request.AMINO_ACID_INSERTIONS_PROPERTY import org.genspectrum.lapis.request.AMINO_ACID_MUTATIONS_PROPERTY @@ -70,9 +74,10 @@ fun buildOpenApiSchema( .description("valid filters for sequence data") .properties( getSequenceFiltersWithFormat( - databaseConfig, - sequenceFilterFields, - mutationsOrderByFieldsEnum(), + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = mutationsOrderByFieldsEnum(), + dataFormatSchema = dataFormatSchema(), ) + Pair(MIN_PROPORTION_PROPERTY, Schema().type("number")), ), ) @@ -80,9 +85,10 @@ fun buildOpenApiSchema( AGGREGATED_REQUEST_SCHEMA, requestSchemaWithFields( getSequenceFiltersWithFormat( - databaseConfig, - sequenceFilterFields, - aggregatedOrderByFieldsEnum(databaseConfig), + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = aggregatedOrderByFieldsEnum(databaseConfig), + dataFormatSchema = dataFormatSchema(), ), AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION, databaseConfig.schema.metadata, @@ -92,9 +98,10 @@ fun buildOpenApiSchema( DETAILS_REQUEST_SCHEMA, requestSchemaWithFields( getSequenceFiltersWithFormat( - databaseConfig, - sequenceFilterFields, - detailsOrderByFieldsEnum(databaseConfig), + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = detailsOrderByFieldsEnum(databaseConfig), + dataFormatSchema = dataFormatSchema(), ), DETAILS_FIELDS_DESCRIPTION, databaseConfig.schema.metadata, @@ -104,29 +111,38 @@ fun buildOpenApiSchema( INSERTIONS_REQUEST_SCHEMA, requestSchemaForCommonSequenceFilters( getSequenceFiltersWithFormat( - databaseConfig, - sequenceFilterFields, - insertionsOrderByFieldsEnum(), + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = insertionsOrderByFieldsEnum(), + dataFormatSchema = dataFormatSchema(), ), ), ) .addSchemas( ALIGNED_AMINO_ACID_SEQUENCE_REQUEST_SCHEMA, requestSchemaForCommonSequenceFilters( - getSequenceFilters( - databaseConfig, - sequenceFilterFields, - aminoAcidSequenceOrderByFieldsEnum(referenceGenomeSchema, databaseConfig), + getSequenceFiltersWithFormat( + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = aminoAcidSequenceOrderByFieldsEnum( + referenceGenomeSchema, + databaseConfig, + ), + dataFormatSchema = sequencesFormatSchema(), ), ), ) .addSchemas( NUCLEOTIDE_SEQUENCE_REQUEST_SCHEMA, requestSchemaForCommonSequenceFilters( - getSequenceFilters( - databaseConfig, - sequenceFilterFields, - nucleotideSequenceOrderByFieldsEnum(referenceGenomeSchema, databaseConfig), + getSequenceFiltersWithFormat( + databaseConfig = databaseConfig, + sequenceFilterFields = sequenceFilterFields, + orderByFieldsSchema = nucleotideSequenceOrderByFieldsEnum( + referenceGenomeSchema, + databaseConfig, + ), + dataFormatSchema = sequencesFormatSchema(), ), ), ) @@ -208,6 +224,20 @@ fun buildOpenApiSchema( .required(aminoAcidInsertionSchema().keys.toList()), ), ) + .addSchemas( + NUCLEOTIDE_SEQUENCES_RESPONSE_SCHEMA, + sequencesResponse( + databaseConfig.schema, + referenceGenomeSchema.nucleotideSequences, + ), + ) + .addSchemas( + AMINO_ACID_SEQUENCES_RESPONSE_SCHEMA, + sequencesResponse( + databaseConfig.schema, + referenceGenomeSchema.genes, + ), + ) .addSchemas(FIELDS_TO_AGGREGATE_BY_SCHEMA, fieldsArray(databaseConfig.schema.metadata)) .addSchemas(DETAILS_FIELDS_SCHEMA, fieldsArray(databaseConfig.schema.metadata)) .addSchemas(AMINO_ACID_MUTATIONS_SCHEMA, aminoAcidMutations()) @@ -241,7 +271,8 @@ fun buildOpenApiSchema( .addSchemas(GENE_SCHEMA, fieldsEnum(additionalFields = referenceGenomeSchema.genes.map { it.name })) .addSchemas(LIMIT_SCHEMA, limitSchema()) .addSchemas(OFFSET_SCHEMA, offsetSchema()) - .addSchemas(FORMAT_SCHEMA, formatSchema()), + .addSchemas(SEQUENCES_FORMAT_SCHEMA, sequencesFormatSchema()) + .addSchemas(FORMAT_SCHEMA, dataFormatSchema()), ) } @@ -249,9 +280,10 @@ private fun getSequenceFiltersWithFormat( databaseConfig: DatabaseConfig, sequenceFilterFields: SequenceFilterFields, orderByFieldsSchema: Schema, + dataFormatSchema: Schema<*>, ): Map> = getSequenceFilters(databaseConfig, sequenceFilterFields, orderByFieldsSchema) + - Pair(FORMAT_PROPERTY, formatSchema()) + Pair(FORMAT_PROPERTY, dataFormatSchema) private fun getSequenceFilters( databaseConfig: DatabaseConfig, @@ -602,15 +634,21 @@ private fun offsetSchema() = .type("integer") .description(OFFSET_DESCRIPTION) -private fun formatSchema() = +private fun dataFormatSchema() = Schema() .type("string") .description( - FORMAT_DESCRIPTION, + DATA_FORMAT_DESCRIPTION, ) ._enum(listOf(DataFormat.JSON, DataFormat.CSV, DataFormat.CSV_WITHOUT_HEADERS, DataFormat.TSV)) ._default(DataFormat.JSON) +private fun sequencesFormatSchema() = + Schema() + .type("string") + .description(SEQUENCES_DATA_FORMAT_DESCRIPTION) + ._enum(listOf(SequencesDataFormat.FASTA, SequencesDataFormat.JSON, SequencesDataFormat.NDJSON)) + private fun fieldsArray( databaseConfig: List, additionalFields: List = emptyList(), @@ -664,3 +702,38 @@ private fun logicalOrArraySchema(schema: Schema) = private fun arraySchema(schema: Schema) = ArraySchema() .items(schema) + +private fun sequencesResponse( + schema: DatabaseSchema, + referenceSequenceSchemas: List, +): Schema<*> { + val baseSchema = Schema() + .type("object") + .addProperty(schema.primaryKey, Schema().type("string")) + .addRequiredItem(schema.primaryKey) + + return when (referenceSequenceSchemas.size == 1) { + true -> + baseSchema + .addProperty( + referenceSequenceSchemas[0].name, + Schema() + .type("string") + .description("The sequence data."), + ) + .addRequiredItem(referenceSequenceSchemas[0].name) + .description("An object containing the primary key and the requested sequence of a sample.") + + false -> { + for (nucleotideSequence in referenceSequenceSchemas) { + baseSchema.addProperty(nucleotideSequence.name, Schema().type("string")) + } + baseSchema + .description( + "An object containing the primary key and the requested sequence of a sample. " + + "This object always contains two exactly keys. " + + "Only the requested sequence is contained in the response, the others are omitted.", + ) + } + } +} diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/Schemas.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/Schemas.kt index 99e2ca05..e71fd519 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/Schemas.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/openApi/Schemas.kt @@ -5,6 +5,7 @@ import io.swagger.v3.oas.annotations.Parameter import io.swagger.v3.oas.annotations.enums.Explode import io.swagger.v3.oas.annotations.enums.ParameterStyle import io.swagger.v3.oas.annotations.headers.Header +import io.swagger.v3.oas.annotations.media.ArraySchema import io.swagger.v3.oas.annotations.media.Content import io.swagger.v3.oas.annotations.media.Schema import io.swagger.v3.oas.annotations.responses.ApiResponse @@ -12,25 +13,24 @@ import org.genspectrum.lapis.controller.AGGREGATED_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION import org.genspectrum.lapis.controller.AGGREGATED_ORDER_BY_FIELDS_DESCRIPTION import org.genspectrum.lapis.controller.ALIGNED_AMINO_ACID_SEQUENCE_ENDPOINT_DESCRIPTION -import org.genspectrum.lapis.controller.ALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION -import org.genspectrum.lapis.controller.ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.AMINO_ACID_INSERTIONS_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.AMINO_ACID_MUTATIONS_ENDPOINT_DESCRIPTION +import org.genspectrum.lapis.controller.DATA_FORMAT_DESCRIPTION import org.genspectrum.lapis.controller.DETAILS_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.DETAILS_FIELDS_DESCRIPTION import org.genspectrum.lapis.controller.DETAILS_ORDER_BY_FIELDS_DESCRIPTION -import org.genspectrum.lapis.controller.FORMAT_DESCRIPTION import org.genspectrum.lapis.controller.LIMIT_DESCRIPTION import org.genspectrum.lapis.controller.LapisHeaders.LAPIS_DATA_VERSION import org.genspectrum.lapis.controller.LapisHeaders.REQUEST_ID +import org.genspectrum.lapis.controller.LapisMediaType import org.genspectrum.lapis.controller.NUCLEOTIDE_INSERTIONS_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.NUCLEOTIDE_MUTATION_ENDPOINT_DESCRIPTION import org.genspectrum.lapis.controller.OFFSET_DESCRIPTION -import org.genspectrum.lapis.controller.UNALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION -import org.genspectrum.lapis.controller.UNALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION +import org.genspectrum.lapis.controller.SEQUENCES_DATA_FORMAT_DESCRIPTION import org.springframework.core.annotation.AliasFor import org.springframework.http.HttpHeaders.ACCEPT_ENCODING import org.springframework.http.HttpHeaders.CONTENT_DISPOSITION +import org.springframework.http.MediaType const val PRIMITIVE_FIELD_FILTERS_SCHEMA = "SequenceFilters" const val REQUEST_SCHEMA_WITH_MIN_PROPORTION = "SequenceFiltersWithMinProportion" @@ -46,6 +46,8 @@ const val NUCLEOTIDE_MUTATIONS_RESPONSE_SCHEMA = "NucleotideMutationsResponse" const val AMINO_ACID_MUTATIONS_RESPONSE_SCHEMA = "AminoAcidMutationsResponse" const val NUCLEOTIDE_INSERTIONS_RESPONSE_SCHEMA = "NucleotideInsertionsResponse" const val AMINO_ACID_INSERTIONS_RESPONSE_SCHEMA = "AminoAcidInsertionsResponse" +const val NUCLEOTIDE_SEQUENCES_RESPONSE_SCHEMA = "NucleotideSequencesResponse" +const val AMINO_ACID_SEQUENCES_RESPONSE_SCHEMA = "AminoAcidSequencesResponse" const val NUCLEOTIDE_MUTATIONS_SCHEMA = "NucleotideMutations" const val AMINO_ACID_MUTATIONS_SCHEMA = "AminoAcidMutations" @@ -61,6 +63,7 @@ const val NUCLEOTIDE_SEQUENCES_ORDER_BY_FIELDS_SCHEMA = "NucleotideSequencesOrde const val LIMIT_SCHEMA = "Limit" const val OFFSET_SCHEMA = "Offset" const val FORMAT_SCHEMA = "DataFormat" +const val SEQUENCES_FORMAT_SCHEMA = "SequencesDataFormat" const val FIELDS_TO_AGGREGATE_BY_SCHEMA = "FieldsToAggregateBy" const val DETAILS_FIELDS_SCHEMA = "DetailsFields" const val GENE_SCHEMA = "Gene" @@ -132,7 +135,7 @@ the other also grants access to detailed data. ) annotation class LapisResponseAnnotation( @get:AliasFor(annotation = Operation::class, attribute = "description") - val description: String, + val description: String = "", @get:AliasFor(annotation = ApiResponse::class, attribute = "content") val content: Array = [], ) @@ -219,41 +222,45 @@ annotation class LapisAminoAcidInsertionsResponse @Retention(AnnotationRetention.RUNTIME) @LapisResponseAnnotation( description = ALIGNED_AMINO_ACID_SEQUENCE_ENDPOINT_DESCRIPTION, - content = [Content(schema = Schema(type = "string"))], + content = [ + Content(mediaType = LapisMediaType.TEXT_X_FASTA_VALUE, schema = Schema(type = "string")), + Content( + mediaType = MediaType.APPLICATION_JSON_VALUE, + array = ArraySchema(schema = Schema(ref = "#/components/schemas/$AMINO_ACID_SEQUENCES_RESPONSE_SCHEMA")), + ), + Content( + mediaType = MediaType.APPLICATION_NDJSON_VALUE, + schema = Schema( + description = "An NDJSON stream of nucleotide sequences. The schema is to be understood per line", + ref = "#/components/schemas/$AMINO_ACID_SEQUENCES_RESPONSE_SCHEMA", + ), + ), + ], ) annotation class LapisAlignedAminoAcidSequenceResponse @Target(AnnotationTarget.FUNCTION) @Retention(AnnotationRetention.RUNTIME) @LapisResponseAnnotation( - description = ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, - content = [Content(schema = Schema(type = "string"))], -) -annotation class LapisAlignedSingleSegmentedNucleotideSequenceResponse - -@Target(AnnotationTarget.FUNCTION) -@Retention(AnnotationRetention.RUNTIME) -@LapisResponseAnnotation( - description = UNALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, - content = [Content(schema = Schema(type = "string"))], -) -annotation class LapisUnalignedSingleSegmentedNucleotideSequenceResponse - -@Target(AnnotationTarget.FUNCTION) -@Retention(AnnotationRetention.RUNTIME) -@LapisResponseAnnotation( - description = ALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, - content = [Content(schema = Schema(type = "string"))], + content = [ + Content(mediaType = LapisMediaType.TEXT_X_FASTA_VALUE, schema = Schema(type = "string")), + Content( + mediaType = MediaType.APPLICATION_JSON_VALUE, + array = ArraySchema(schema = Schema(ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCES_RESPONSE_SCHEMA")), + ), + Content( + mediaType = MediaType.APPLICATION_NDJSON_VALUE, + schema = Schema( + description = "An NDJSON stream of nucleotide sequences. The schema is to be understood per line", + ref = "#/components/schemas/$NUCLEOTIDE_SEQUENCES_RESPONSE_SCHEMA", + ), + ), + ], ) -annotation class LapisAlignedMultiSegmentedNucleotideSequenceResponse - -@Target(AnnotationTarget.FUNCTION) -@Retention(AnnotationRetention.RUNTIME) -@LapisResponseAnnotation( - description = UNALIGNED_MULTI_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION, - content = [Content(schema = Schema(type = "string"))], +annotation class LapisNucleotideSequenceResponse( + @get:AliasFor(annotation = LapisResponseAnnotation::class, attribute = "description") + val description: String = "", ) -annotation class LapisUnalignedMultiSegmentedNucleotideSequenceResponse @Target(AnnotationTarget.FUNCTION) @Retention(AnnotationRetention.RUNTIME) @@ -276,7 +283,7 @@ annotation class StringResponseOperation( @Retention(AnnotationRetention.RUNTIME) @Parameter( description = - "Valid filters for sequence data. This may be empty. Only provide the fields that should be filtered by.", + "Valid filters for sequence data. This may be empty. Only provide the fields that should be filtered by.", schema = Schema(ref = "#/components/schemas/$PRIMITIVE_FIELD_FILTERS_SCHEMA"), explode = Explode.TRUE, style = ParameterStyle.FORM, @@ -374,10 +381,18 @@ annotation class Offset @Retention(AnnotationRetention.RUNTIME) @Parameter( schema = Schema(ref = "#/components/schemas/$FORMAT_SCHEMA"), - description = FORMAT_DESCRIPTION, + description = DATA_FORMAT_DESCRIPTION, ) annotation class DataFormat +@Target(AnnotationTarget.VALUE_PARAMETER) +@Retention(AnnotationRetention.RUNTIME) +@Parameter( + schema = Schema(ref = "#/components/schemas/$SEQUENCES_FORMAT_SCHEMA"), + description = SEQUENCES_DATA_FORMAT_DESCRIPTION, +) +annotation class SequencesDataFormat + @Target(AnnotationTarget.VALUE_PARAMETER) @Retention(AnnotationRetention.RUNTIME) @Parameter( diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/response/SequencesStreamer.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/response/SequencesStreamer.kt new file mode 100644 index 00000000..09b9607d --- /dev/null +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/response/SequencesStreamer.kt @@ -0,0 +1,96 @@ +package org.genspectrum.lapis.response + +import com.fasterxml.jackson.databind.ObjectMapper +import jakarta.servlet.http.HttpServletResponse +import org.genspectrum.lapis.controller.LapisHeaders.LAPIS_DATA_VERSION +import org.genspectrum.lapis.controller.LapisMediaType.TEXT_X_FASTA +import org.genspectrum.lapis.silo.DataVersion +import org.springframework.http.MediaType +import org.springframework.stereotype.Component +import java.nio.charset.Charset +import java.util.stream.Stream + +@Component +class SequencesStreamer( + private val dataVersion: DataVersion, + private val objectMapper: ObjectMapper, +) { + fun stream( + sequenceData: Stream, + response: HttpServletResponse, + acceptHeaders: List, + ) { + response.setHeader(LAPIS_DATA_VERSION, dataVersion.dataVersion) + + for (acceptHeader in acceptHeaders) { + if (TEXT_X_FASTA.includes(acceptHeader)) { + streamFasta(response, sequenceData) + return + } + + if (MediaType.APPLICATION_JSON.includes(acceptHeader)) { + streamJson(response, sequenceData) + return + } + + if (MediaType.APPLICATION_NDJSON.includes(acceptHeader)) { + streamNdjson(response, sequenceData) + return + } + } + + streamFasta(response, sequenceData) + } + + private fun streamFasta( + response: HttpServletResponse, + sequenceData: Stream, + ) { + if (response.contentType == null) { + response.contentType = MediaType(TEXT_X_FASTA, Charset.defaultCharset()).toString() + } + + response.outputStream.writer().use { stream -> + sequenceData.filter { it.sequence != null } + .forEach { stream.appendLine(it.writeAsFasta()) } + } + } + + private fun streamJson( + response: HttpServletResponse, + sequenceData: Stream, + ) { + if (response.contentType == null) { + response.contentType = MediaType(MediaType.APPLICATION_JSON, Charset.defaultCharset()).toString() + } + + var isFirstEntry = true + response.outputStream.writer().use { stream -> + stream.append('[') + sequenceData.filter { it.sequence != null } + .forEach { + if (isFirstEntry) { + isFirstEntry = false + } else { + stream.append(',') + } + stream.append(objectMapper.writeValueAsString(it)) + } + stream.append(']') + } + } + + private fun streamNdjson( + response: HttpServletResponse, + sequenceData: Stream, + ) { + if (response.contentType == null) { + response.contentType = MediaType(MediaType.APPLICATION_NDJSON, Charset.defaultCharset()).toString() + } + + response.outputStream.writer().use { stream -> + sequenceData.filter { it.sequence != null } + .forEach { stream.appendLine(objectMapper.writeValueAsString(it)) } + } + } +} diff --git a/lapis/src/main/kotlin/org/genspectrum/lapis/response/SiloResponse.kt b/lapis/src/main/kotlin/org/genspectrum/lapis/response/SiloResponse.kt index a27c7c45..60d3cd6c 100644 --- a/lapis/src/main/kotlin/org/genspectrum/lapis/response/SiloResponse.kt +++ b/lapis/src/main/kotlin/org/genspectrum/lapis/response/SiloResponse.kt @@ -1,5 +1,6 @@ package org.genspectrum.lapis.response +import com.fasterxml.jackson.annotation.JsonIgnore import com.fasterxml.jackson.core.JsonGenerator import com.fasterxml.jackson.core.JsonParser import com.fasterxml.jackson.core.type.TypeReference @@ -11,15 +12,8 @@ import com.fasterxml.jackson.databind.SerializerProvider import com.fasterxml.jackson.databind.node.JsonNodeType import com.fasterxml.jackson.databind.node.NullNode import io.swagger.v3.oas.annotations.media.Schema -import jakarta.servlet.http.HttpServletResponse import org.genspectrum.lapis.config.DatabaseConfig -import org.genspectrum.lapis.controller.LapisHeaders.LAPIS_DATA_VERSION -import org.genspectrum.lapis.controller.LapisMediaType.TEXT_X_FASTA -import org.genspectrum.lapis.silo.DataVersion import org.springframework.boot.jackson.JsonComponent -import org.springframework.http.MediaType -import java.nio.charset.Charset -import java.util.stream.Stream const val COUNT_PROPERTY = "count" @@ -109,22 +103,10 @@ data class InsertionData( data class SequenceData( val sequenceKey: String, val sequence: String?, + @JsonIgnore + val sequenceName: String, ) { - fun writeAsString() = ">$sequenceKey\n$sequence" -} - -fun Stream.writeFastaTo( - response: HttpServletResponse, - dataVersion: DataVersion, -) { - response.setHeader(LAPIS_DATA_VERSION, dataVersion.dataVersion) - if (response.contentType == null) { - response.contentType = MediaType(TEXT_X_FASTA, Charset.defaultCharset()).toString() - } - response.outputStream.writer().use { stream -> - this.filter { it.sequence != null } - .forEach { stream.appendLine(it.writeAsString()) } - } + fun writeAsFasta() = ">$sequenceKey\n$sequence" } data class InfoData( @@ -140,7 +122,7 @@ class SequenceDataDeserializer(val databaseConfig: DatabaseConfig) : JsonDeseria val node = p.readValueAsTree() val sequenceKey = node.get(databaseConfig.schema.primaryKey).asText() - val value = node.fields().asSequence().first { it.key != databaseConfig.schema.primaryKey }.value + val (key, value) = node.fields().asSequence().first { it.key != databaseConfig.schema.primaryKey } val sequence = when (value.nodeType) { JsonNodeType.NULL -> null JsonNodeType.STRING -> value.asText() @@ -152,6 +134,21 @@ class SequenceDataDeserializer(val databaseConfig: DatabaseConfig) : JsonDeseria return SequenceData( sequenceKey = sequenceKey, sequence = sequence, + sequenceName = key, ) } } + +@JsonComponent +class SequenceDataSerializer(val databaseConfig: DatabaseConfig) : JsonSerializer() { + override fun serialize( + value: SequenceData, + gen: JsonGenerator, + serializers: SerializerProvider, + ) { + gen.writeStartObject() + gen.writeStringField(databaseConfig.schema.primaryKey, value.sequenceKey) + gen.writeStringField(value.sequenceName, value.sequence) + gen.writeEndObject() + } +} diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerCompressionTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerCompressionTest.kt index 0701f511..616f2642 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerCompressionTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerCompressionTest.kt @@ -6,7 +6,6 @@ import com.ninjasquad.springmockk.MockkBean import io.mockk.every import org.genspectrum.lapis.controller.LapisMediaType.TEXT_CSV_VALUE import org.genspectrum.lapis.controller.LapisMediaType.TEXT_TSV_VALUE -import org.genspectrum.lapis.controller.LapisMediaType.TEXT_X_FASTA_VALUE import org.genspectrum.lapis.controller.MockDataCollection.DataFormat.CSV import org.genspectrum.lapis.controller.MockDataCollection.DataFormat.CSV_WITHOUT_HEADERS import org.genspectrum.lapis.controller.MockDataCollection.DataFormat.NESTED_JSON @@ -214,15 +213,29 @@ class LapisControllerCompressionTest( compressionFormat = COMPRESSION_FORMAT_ZSTD, ) + listOf( - "${UNALIGNED_NUCLEOTIDE_SEQUENCES.pathSegment}/main", - "${ALIGNED_NUCLEOTIDE_SEQUENCES.pathSegment}/main", - "${ALIGNED_AMINO_ACID_SEQUENCES.pathSegment}/gene1", + UNALIGNED_NUCLEOTIDE_SEQUENCES to "main", + ALIGNED_NUCLEOTIDE_SEQUENCES to "main", + ALIGNED_AMINO_ACID_SEQUENCES to "gene1", ) - .flatMap { - getFastaRequests(it, COMPRESSION_FORMAT_GZIP) + getFastaRequests( - it, - COMPRESSION_FORMAT_ZSTD, - ) + .flatMap { (route, sequenceName) -> + getFastaRequests( + endpoint = "$route/$sequenceName", + sequenceName = sequenceName, + dataFormat = SequenceEndpointMockDataCollection.DataFormat.FASTA, + compressionFormat = COMPRESSION_FORMAT_GZIP, + ) + + getFastaRequests( + endpoint = "$route/$sequenceName", + sequenceName = sequenceName, + dataFormat = SequenceEndpointMockDataCollection.DataFormat.JSON, + compressionFormat = COMPRESSION_FORMAT_ZSTD, + ) + + getFastaRequests( + endpoint = "$route/$sequenceName", + sequenceName = sequenceName, + dataFormat = SequenceEndpointMockDataCollection.DataFormat.NDJSON, + compressionFormat = COMPRESSION_FORMAT_ZSTD, + ) } @JvmStatic @@ -322,51 +335,67 @@ fun getRequests( private fun getFastaRequests( endpoint: String, + sequenceName: String, + dataFormat: SequenceEndpointMockDataCollection.DataFormat, compressionFormat: String, ) = listOf( RequestScenario( - callDescription = "GET $endpoint with request parameter", - mockData = MockDataForEndpoints.fastaMockData, - request = getSample("$endpoint?country=Switzerland&compression=$compressionFormat"), + callDescription = "GET $endpoint as $dataFormat with request parameter", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), + request = getSample("$endpoint") + .queryParam("country", "Switzerland") + .queryParam("dataFormat", dataFormat.fileFormat) + .queryParam("compression", compressionFormat), compressionFormat = compressionFormat, expectedContentType = getContentTypeForCompressionFormat(compressionFormat), expectedContentEncoding = null, ), RequestScenario( - callDescription = "GET $endpoint with accept header", - mockData = MockDataForEndpoints.fastaMockData, + callDescription = "GET $endpoint as $dataFormat with accept header", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), request = getSample("$endpoint?country=Switzerland") + .accept(dataFormat.acceptHeader) .header(ACCEPT_ENCODING, compressionFormat), compressionFormat = compressionFormat, - expectedContentType = "$TEXT_X_FASTA_VALUE;charset=UTF-8", + expectedContentType = "${dataFormat.acceptHeader};charset=UTF-8", expectedContentEncoding = compressionFormat, ), RequestScenario( - callDescription = "POST JSON $endpoint with request parameter", - mockData = MockDataForEndpoints.fastaMockData, + callDescription = "POST JSON $endpoint as $dataFormat with request parameter", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), request = postSample(endpoint) - .content("""{"country": "Switzerland", "compression": "$compressionFormat"}""") + .content( + """ + { + "country": "Switzerland", + "dataFormat": "${dataFormat.fileFormat}", + "compression": "$compressionFormat" + } + """.trimIndent(), + ) .contentType(APPLICATION_JSON), compressionFormat = compressionFormat, expectedContentType = getContentTypeForCompressionFormat(compressionFormat), expectedContentEncoding = null, ), RequestScenario( - callDescription = "POST JSON $endpoint with accept header", - mockData = MockDataForEndpoints.fastaMockData, + callDescription = "POST JSON $endpoint as $dataFormat with accept header", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), request = postSample(endpoint) .content("""{"country": "Switzerland"}""") .contentType(APPLICATION_JSON) + .accept(dataFormat.acceptHeader) .header(ACCEPT_ENCODING, compressionFormat), compressionFormat = compressionFormat, - expectedContentType = "$TEXT_X_FASTA_VALUE;charset=UTF-8", + expectedContentType = "${dataFormat.acceptHeader};charset=UTF-8", expectedContentEncoding = compressionFormat, ), RequestScenario( - callDescription = "POST form url encoded $endpoint with request parameter", - mockData = MockDataForEndpoints.fastaMockData, + callDescription = "POST form url encoded $endpoint as $dataFormat with request parameter", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), request = postSample(endpoint) .param("country", "Switzerland") + .param("dataFormat", dataFormat.fileFormat) .param("compression", compressionFormat) .contentType(APPLICATION_FORM_URLENCODED), compressionFormat = compressionFormat, @@ -374,14 +403,15 @@ private fun getFastaRequests( expectedContentEncoding = null, ), RequestScenario( - callDescription = "POST form url encoded $endpoint with accept header", - mockData = MockDataForEndpoints.fastaMockData, + callDescription = "POST form url encoded $endpoint as $dataFormat with accept header", + mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName).expecting(dataFormat), request = postSample(endpoint) .param("country", "Switzerland") .contentType(APPLICATION_FORM_URLENCODED) + .accept(dataFormat.acceptHeader) .header(ACCEPT_ENCODING, compressionFormat), compressionFormat = compressionFormat, - expectedContentType = "$TEXT_X_FASTA_VALUE;charset=UTF-8", + expectedContentType = "${dataFormat.acceptHeader};charset=UTF-8", expectedContentEncoding = compressionFormat, ), ) diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerDownloadAsFileTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerDownloadAsFileTest.kt index 01f320fd..1ef7bd1c 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerDownloadAsFileTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerDownloadAsFileTest.kt @@ -270,27 +270,72 @@ data class DownloadAsFileScenario( val expectedFilename = route.getExpectedFilename() if (route.servesFasta) { - return listOf( - DownloadAsFileScenario( - endpoint = "${route.pathSegment}/segmentName", - mockData = MockDataForEndpoints.fastaMockData, - requestedDataFormat = null, - expectedFilename = "$expectedFilename.fasta", - downloadFileBasename = null, - ), - DownloadAsFileScenario( - endpoint = "${route.pathSegment}/segmentName", - mockData = MockDataForEndpoints.fastaMockData, - requestedDataFormat = null, - expectedFilename = "my_sequence.fasta", - downloadFileBasename = "my_sequence", - ), - ) + return forSequenceEndpointDataFormats(route, expectedFilename) } return forDataFormats(route.pathSegment, expectedFilename) } + private fun forSequenceEndpointDataFormats( + route: SampleRoute, + expectedFilename: String, + ) = listOf( + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.FASTA, + ), + requestedDataFormat = null, + expectedFilename = "$expectedFilename.fasta", + downloadFileBasename = null, + ), + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.JSON, + ), + requestedDataFormat = SequenceEndpointMockDataCollection.DataFormat.JSON.fileFormat, + expectedFilename = "$expectedFilename.json", + downloadFileBasename = null, + ), + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.NDJSON, + ), + requestedDataFormat = SequenceEndpointMockDataCollection.DataFormat.NDJSON.fileFormat, + expectedFilename = "$expectedFilename.ndjson", + downloadFileBasename = null, + ), + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.FASTA, + ), + requestedDataFormat = null, + expectedFilename = "my_sequence.fasta", + downloadFileBasename = "my_sequence", + ), + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.JSON, + ), + requestedDataFormat = SequenceEndpointMockDataCollection.DataFormat.JSON.fileFormat, + expectedFilename = "my_sequence.json", + downloadFileBasename = "my_sequence", + ), + DownloadAsFileScenario( + endpoint = "${route.pathSegment}/segmentName", + mockData = MockDataForEndpoints.sequenceEndpointMockData().expecting( + SequenceEndpointMockDataCollection.DataFormat.NDJSON, + ), + requestedDataFormat = SequenceEndpointMockDataCollection.DataFormat.NDJSON.fileFormat, + expectedFilename = "my_sequence.ndjson", + downloadFileBasename = "my_sequence", + ), + ) + private fun forDataFormats( endpoint: String, expectedFilename: String, @@ -381,7 +426,8 @@ data class DownloadCompressedFileScenario( compressionFormat: String, ): List { val (mockData, dataFileFormat) = if (route.servesFasta) { - MockDataForEndpoints.fastaMockData to "fasta" + MockDataForEndpoints.sequenceEndpointMockData() + .expecting(SequenceEndpointMockDataCollection.DataFormat.FASTA) to "fasta" } else { MockDataForEndpoints.getMockData(route.pathSegment).expecting(dataFormat) to dataFormat.fileFormat } diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerTest.kt index f598c08d..ab701466 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/LapisControllerTest.kt @@ -12,7 +12,6 @@ import org.genspectrum.lapis.response.AminoAcidMutationResponse import org.genspectrum.lapis.response.DetailsData import org.genspectrum.lapis.response.NucleotideInsertionResponse import org.genspectrum.lapis.response.NucleotideMutationResponse -import org.genspectrum.lapis.response.SequenceData import org.genspectrum.lapis.silo.DataVersion import org.genspectrum.lapis.silo.SequenceType import org.hamcrest.Matchers.containsInAnyOrder @@ -29,7 +28,6 @@ import org.springframework.test.web.servlet.MockMvc import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder import org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get import org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post -import org.springframework.test.web.servlet.result.MockMvcResultMatchers.content import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header import org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath import org.springframework.test.web.servlet.result.MockMvcResultMatchers.status @@ -244,38 +242,28 @@ class LapisControllerTest( ) } - @ParameterizedTest(name = "{0} alignedAminoAcidSequences") - @MethodSource("getRequests") - fun alignedAminoAcidSequences( - testName: String, - request: (String) -> MockHttpServletRequestBuilder, - ) { + @ParameterizedTest(name = "{0}") + @MethodSource("getAlignedAminoAcidSequencesScenarios") + fun alignedAminoAcidSequences(scenario: SequenceEndpointTestScenario) { every { siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(mapOf("country" to "Switzerland")), SequenceType.ALIGNED, "geneName", ) - } returns Stream.of( - SequenceData("key1", "the sequence"), - SequenceData("key2", "the other sequence"), - SequenceData("key3", null), - ) + } returns MockDataForEndpoints + .sequenceEndpointMockData("geneName") + .sequenceData + .stream() - mockMvc.perform(request("$ALIGNED_AMINO_ACID_SEQUENCES_ROUTE/geneName")) + val responseContent = mockMvc.perform(scenario.request) .andExpect(status().isOk) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) - .andExpect( - content().string( - """ - >key1 - the sequence - >key2 - the other sequence - - """.trimIndent(), - ), - ) + .andReturn() + .response + .contentAsString + + scenario.mockData.assertDataMatches(responseContent) } private companion object { @@ -417,6 +405,12 @@ class LapisControllerTest( }, ), ) + + @JvmStatic + val alignedAminoAcidSequencesScenarios = SequenceEndpointTestScenario.createScenarios( + route = "$ALIGNED_AMINO_ACID_SEQUENCES_ROUTE/geneName", + sequenceName = "geneName", + ) } @ParameterizedTest(name = "{0} details") diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MockData.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MockData.kt index 33cf5802..143f9b59 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MockData.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MockData.kt @@ -8,6 +8,7 @@ import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import io.mockk.every import org.genspectrum.lapis.controller.LapisMediaType.TEXT_CSV_VALUE import org.genspectrum.lapis.controller.LapisMediaType.TEXT_TSV_VALUE +import org.genspectrum.lapis.controller.LapisMediaType.TEXT_X_FASTA_VALUE import org.genspectrum.lapis.model.SiloQueryModel import org.genspectrum.lapis.response.AggregationData import org.genspectrum.lapis.response.AminoAcidInsertionResponse @@ -17,8 +18,10 @@ import org.genspectrum.lapis.response.NucleotideInsertionResponse import org.genspectrum.lapis.response.NucleotideMutationResponse import org.genspectrum.lapis.response.SequenceData import org.hamcrest.MatcherAssert.assertThat +import org.hamcrest.Matchers.hasSize import org.hamcrest.Matchers.`is` import org.springframework.http.MediaType.APPLICATION_JSON_VALUE +import org.springframework.http.MediaType.APPLICATION_NDJSON_VALUE import java.util.stream.Stream data class MockDataCollection( @@ -90,28 +93,38 @@ data class MockDataCollection( ) } -data class MockData( +data class SequenceEndpointMockDataCollection( + val sequenceData: List, val mockToReturnEmptyData: (SiloQueryModel) -> Unit, val mockWithData: (SiloQueryModel) -> Unit, - val assertDataMatches: (String) -> Unit, + val expectedFasta: String, + val expectedJson: String, + val expectedNdjson: String, ) { - constructor( - mockToReturnEmptyData: (SiloQueryModel) -> Unit, - mockWithData: (SiloQueryModel) -> Unit, - expectedStringContent: String, - ) : this( - mockToReturnEmptyData, - mockWithData, - { assertThat(it, `is`(expectedStringContent)) }, - ) + enum class DataFormat(val fileFormat: String, val acceptHeader: String) { + JSON("json", APPLICATION_JSON_VALUE), + NDJSON("ndjson", APPLICATION_NDJSON_VALUE), + FASTA("fasta", TEXT_X_FASTA_VALUE), + } companion object { - fun createForFastaEndpoint( + fun create( sequenceData: List, expectedFasta: String, - ) = MockData( - { modelMock -> every { modelMock.getGenomicSequence(any(), any(), any()) } returns Stream.empty() }, - { modelMock -> + expectedJson: String, + expectedNdjson: String, + ) = SequenceEndpointMockDataCollection( + sequenceData = sequenceData, + mockToReturnEmptyData = { modelMock -> + every { + modelMock.getGenomicSequence( + any(), + any(), + any(), + ) + } returns Stream.empty() + }, + mockWithData = { modelMock -> every { modelMock.getGenomicSequence( any(), @@ -120,11 +133,57 @@ data class MockData( ) } returns sequenceData.stream() }, - expectedFasta, + expectedFasta = expectedFasta, + expectedJson = expectedJson, + expectedNdjson = expectedNdjson, ) } + + fun expecting(dataFormat: DataFormat) = + MockData( + mockToReturnEmptyData = mockToReturnEmptyData, + mockWithData = mockWithData, + assertDataMatches = when (dataFormat) { + DataFormat.JSON -> { + { + val objectMapper = jacksonObjectMapper() + val actual = objectMapper.readTree(it) + val expectedData = objectMapper.readTree(expectedJson) + assertThat(actual, `is`(expectedData)) + } + } + + DataFormat.NDJSON -> { + { actual -> + val objectMapper = jacksonObjectMapper() + val actualLines = actual.lines().filter { it.isNotBlank() } + val expectedLines = expectedNdjson.lines().filter { it.isNotBlank() } + + assertThat(actualLines, hasSize(expectedLines.size)) + + actualLines.zip(expectedLines).forEachIndexed { index, (actualLine, expectedLine) -> + assertThat( + "Line ${index + 1} of $actual was not as expected:", + objectMapper.readTree(actualLine), + `is`(objectMapper.readTree(expectedLine)), + ) + } + } + } + + DataFormat.FASTA -> { + { assertThat(it, `is`(expectedFasta)) } + } + }, + ) } +data class MockData( + val mockToReturnEmptyData: (SiloQueryModel) -> Unit, + val mockWithData: (SiloQueryModel) -> Unit, + val assertDataMatches: (String) -> Unit, +) + object MockDataForEndpoints { fun getMockData(endpoint: String) = when (endpoint) { @@ -137,19 +196,31 @@ object MockDataForEndpoints { else -> throw IllegalArgumentException("Test issue: no mock data for endpoint $endpoint") } - val fastaMockData = MockData.createForFastaEndpoint( - sequenceData = listOf( - SequenceData("sequence1", "CAGAA"), - SequenceData("sequence2", "CAGAA"), - ), - expectedFasta = """ - >sequence1 - CAGAA - >sequence2 - CAGAA + fun sequenceEndpointMockData(sequenceName: String = "main") = + SequenceEndpointMockDataCollection.create( + sequenceData = listOf( + SequenceData(sequenceKey = "sequence1", sequence = "CAGAA", sequenceName = sequenceName), + SequenceData(sequenceKey = "sequence2", sequence = "CAGAT", sequenceName = sequenceName), + SequenceData(sequenceKey = "sequence3", sequence = null, sequenceName = sequenceName), + ), + expectedFasta = """ + >sequence1 + CAGAA + >sequence2 + CAGAT - """.trimIndent(), - ) + """.trimIndent(), + expectedJson = """ + [ + { "primaryKey": "sequence1", "$sequenceName": "CAGAA" }, + { "primaryKey": "sequence2", "$sequenceName": "CAGAT" } + ] + """.trimIndent(), + expectedNdjson = """ + { "primaryKey": "sequence1", "$sequenceName": "CAGAA" } + { "primaryKey": "sequence2", "$sequenceName": "CAGAT" } + """.trimIndent(), + ) private val aggregated = MockDataCollection.create( siloQueryModelMockCall = { it::getAggregated }, diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceControllerTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceControllerTest.kt index 6c516ebc..3543b5a6 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceControllerTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/MultiSegmentedSequenceControllerTest.kt @@ -23,9 +23,11 @@ import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header import org.springframework.test.web.servlet.result.MockMvcResultMatchers.status import java.util.stream.Stream +private const val SEGMENT_NAME = "otherSegment" + @SpringBootTest( properties = [ - "$REFERENCE_GENOME_SEGMENTS_APPLICATION_ARG_PREFIX=someSegment,otherSegment", + "$REFERENCE_GENOME_SEGMENTS_APPLICATION_ARG_PREFIX=someSegment,$SEGMENT_NAME", "$REFERENCE_GENOME_GENES_APPLICATION_ARG_PREFIX=gene1,gene2", ], ) @@ -33,16 +35,14 @@ import java.util.stream.Stream class MultiSegmentedSequenceControllerTest( @Autowired val mockMvc: MockMvc, ) { - val returnedValue: Stream = Stream.of( - SequenceData("sequenceKey", "theSequence"), - SequenceData("sequenceKeyWithNullValue", null), - ) + val returnedValue: Stream = MockDataForEndpoints + .sequenceEndpointMockData(SEGMENT_NAME) + .sequenceData + .stream() - val expectedFasta = """ - >sequenceKey - theSequence - - """.trimIndent() + val expectedFasta = MockDataForEndpoints + .sequenceEndpointMockData(SEGMENT_NAME) + .expectedFasta @MockkBean lateinit var siloQueryModelMock: SiloQueryModel @@ -67,34 +67,35 @@ class MultiSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.ALIGNED, - "otherSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/otherSegment")) + mockMvc.perform(request("$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/$SEGMENT_NAME")) .andExpect(status().isOk) .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) } - @ParameterizedTest(name = "should {0} alignedNucleotideSequences with filter") - @MethodSource("getRequestsWithFilter") - fun `should call alignedNucleotideSequences with filter`( - description: String, - request: (String) -> MockHttpServletRequestBuilder, - ) { + @ParameterizedTest(name = "{0}") + @MethodSource("getAlignedRequestsWithFilter") + fun `should call alignedNucleotideSequences with filter`(scenario: SequenceEndpointTestScenario) { every { siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(mapOf("country" to "Switzerland")), SequenceType.ALIGNED, - "otherSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/otherSegment")) + val responseContent = mockMvc.perform(scenario.request) .andExpect(status().isOk) - .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) + .andReturn() + .response + .contentAsString + + scenario.mockData.assertDataMatches(responseContent) } @ParameterizedTest(name = "should not {0} alignedNucleotideSequence without segment") @@ -125,34 +126,35 @@ class MultiSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.UNALIGNED, - "otherSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/otherSegment")) + mockMvc.perform(request("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/$SEGMENT_NAME")) .andExpect(status().isOk) .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) } - @ParameterizedTest(name = "should {0} unalignedNucleotideSequences with filter") - @MethodSource("getRequestsWithFilter") - fun `should call unalignedNucleotideSequences with filter`( - description: String, - request: (String) -> MockHttpServletRequestBuilder, - ) { + @ParameterizedTest(name = "{0}") + @MethodSource("getUnalignedRequestsWithFilter") + fun `should call unalignedNucleotideSequences with filter`(scenario: SequenceEndpointTestScenario) { every { siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(mapOf("country" to "Switzerland")), SequenceType.UNALIGNED, - "otherSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/otherSegment")) + val responseContent = mockMvc.perform(scenario.request) .andExpect(status().isOk) - .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) + .andReturn() + .response + .contentAsString + + scenario.mockData.assertDataMatches(responseContent) } @ParameterizedTest(name = "should not {0} unalignedNucleotideSequences without segment") @@ -175,30 +177,15 @@ class MultiSegmentedSequenceControllerTest( companion object { @JvmStatic - val requestsWithFilter = listOf( - Arguments.of( - "GET", - { route: String -> - getSample(route) - .queryParam("country", "Switzerland") - }, - ), - Arguments.of( - "POST JSON", - { route: String -> - postSample(route) - .content("""{"country": "Switzerland"}""") - .contentType(MediaType.APPLICATION_JSON) - }, - ), - Arguments.of( - "POST form encoded", - { route: String -> - postSample(route) - .param("country", "Switzerland") - .contentType(MediaType.APPLICATION_FORM_URLENCODED_VALUE) - }, - ), + val alignedRequestsWithFilter = SequenceEndpointTestScenario.createScenarios( + route = "$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/$SEGMENT_NAME", + sequenceName = SEGMENT_NAME, + ) + + @JvmStatic + val unalignedRequestsWithFilter = SequenceEndpointTestScenario.createScenarios( + route = "$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/$SEGMENT_NAME", + sequenceName = SEGMENT_NAME, ) @JvmStatic diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SequenceEndpointTestScenario.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SequenceEndpointTestScenario.kt new file mode 100644 index 00000000..5c0b1076 --- /dev/null +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SequenceEndpointTestScenario.kt @@ -0,0 +1,100 @@ +package org.genspectrum.lapis.controller + +import org.springframework.http.MediaType +import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder + +data class SequenceEndpointTestScenario( + val description: String, + val request: MockHttpServletRequestBuilder, + val mockData: MockData, +) { + override fun toString() = description + + companion object { + fun createScenarios( + route: String, + sequenceName: String, + ): List { + return listOf( + null, + SequenceEndpointMockDataCollection.DataFormat.FASTA, + SequenceEndpointMockDataCollection.DataFormat.JSON, + SequenceEndpointMockDataCollection.DataFormat.NDJSON, + ) + .flatMap { dataFormat -> + val dataFormatJsonSnippet = + dataFormat?.let { """, "dataFormat": "${dataFormat.fileFormat}" """ } ?: "" + val mockData = MockDataForEndpoints.sequenceEndpointMockData(sequenceName) + .expecting(dataFormat ?: SequenceEndpointMockDataCollection.DataFormat.FASTA) + + listOf( + SequenceEndpointTestScenario( + description = "GET $route with data format $dataFormat", + request = getSample(route) + .queryParam("country", "Switzerland") + .apply { + if (dataFormat != null) { + queryParam("dataFormat", dataFormat.toString()) + } + }, + mockData = mockData, + ), + SequenceEndpointTestScenario( + description = "GET $route with accept header for $dataFormat", + request = getSample(route) + .queryParam("country", "Switzerland") + .apply { + if (dataFormat != null) { + accept(dataFormat.acceptHeader) + } + }, + mockData = mockData, + ), + SequenceEndpointTestScenario( + description = "POST JSON $route with data format $dataFormat", + request = postSample(route) + .content("""{"country": "Switzerland" $dataFormatJsonSnippet}""") + .contentType(MediaType.APPLICATION_JSON), + mockData = mockData, + ), + SequenceEndpointTestScenario( + description = "POST JSON $route with accept header for $dataFormat", + request = postSample(route) + .content("""{"country": "Switzerland"}""") + .contentType(MediaType.APPLICATION_JSON) + .apply { + if (dataFormat != null) { + accept(dataFormat.acceptHeader) + } + }, + mockData = mockData, + ), + SequenceEndpointTestScenario( + description = "POST form encoded $route with data format $dataFormat", + request = postSample(route) + .param("country", "Switzerland") + .apply { + if (dataFormat != null) { + param("dataFormat", dataFormat.toString()) + } + } + .contentType(MediaType.APPLICATION_FORM_URLENCODED_VALUE), + mockData = mockData, + ), + SequenceEndpointTestScenario( + description = "POST form encoded $route with accept header for $dataFormat", + request = postSample(route) + .param("country", "Switzerland") + .contentType(MediaType.APPLICATION_FORM_URLENCODED_VALUE) + .apply { + if (dataFormat != null) { + accept(dataFormat.acceptHeader) + } + }, + mockData = mockData, + ), + ) + } + } + } +} diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceControllerTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceControllerTest.kt index 69f5425e..2f0eaf9b 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceControllerTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/controller/SingleSegmentedSequenceControllerTest.kt @@ -21,9 +21,11 @@ import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header import org.springframework.test.web.servlet.result.MockMvcResultMatchers.status import java.util.stream.Stream +private const val SEGMENT_NAME = "otherSegment" + @SpringBootTest( properties = [ - "$REFERENCE_GENOME_SEGMENTS_APPLICATION_ARG_PREFIX=someSegment", + "$REFERENCE_GENOME_SEGMENTS_APPLICATION_ARG_PREFIX=$SEGMENT_NAME", "$REFERENCE_GENOME_GENES_APPLICATION_ARG_PREFIX=gene1,gene2", ], ) @@ -31,16 +33,14 @@ import java.util.stream.Stream class SingleSegmentedSequenceControllerTest( @Autowired val mockMvc: MockMvc, ) { - val returnedValue: Stream = Stream.of( - SequenceData("sequenceKey", "theSequence"), - SequenceData("sequenceKeyWithNullValue", null), - ) + val returnedValue: Stream = MockDataForEndpoints + .sequenceEndpointMockData("otherSegment") + .sequenceData + .stream() - val expectedFasta = """ - >sequenceKey - theSequence - - """.trimIndent() + val expectedFasta = MockDataForEndpoints + .sequenceEndpointMockData("otherSegment") + .expectedFasta @MockkBean lateinit var siloQueryModelMock: SiloQueryModel @@ -65,7 +65,7 @@ class SingleSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.ALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue @@ -75,24 +75,25 @@ class SingleSegmentedSequenceControllerTest( .andExpect(header().stringValues("Lapis-Data-Version", "1234")) } - @ParameterizedTest(name = "should {0} alignedNucleotideSequences with filter") - @MethodSource("org.genspectrum.lapis.controller.MultiSegmentedSequenceControllerTest#getRequestsWithFilter") - fun `should call alignedNucleotideSequences with filter`( - description: String, - request: (String) -> MockHttpServletRequestBuilder, - ) { + @ParameterizedTest(name = "{0}") + @MethodSource("getAlignedRequestsWithFilter") + fun `should call alignedNucleotideSequences with filter`(scenario: SequenceEndpointTestScenario) { every { siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(mapOf("country" to "Switzerland")), SequenceType.ALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE")) + val responseContent = mockMvc.perform(scenario.request) .andExpect(status().isOk) - .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) + .andReturn() + .response + .contentAsString + + scenario.mockData.assertDataMatches(responseContent) } @ParameterizedTest(name = "should {0} alignedNucleotideSequences with empty filter") @@ -105,7 +106,7 @@ class SingleSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.ALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue @@ -123,7 +124,7 @@ class SingleSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.UNALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue @@ -133,24 +134,25 @@ class SingleSegmentedSequenceControllerTest( .andExpect(header().stringValues("Lapis-Data-Version", "1234")) } - @ParameterizedTest(name = "should {0} unalignedNucleotideSequence with filter") - @MethodSource("org.genspectrum.lapis.controller.MultiSegmentedSequenceControllerTest#getRequestsWithFilter") - fun `should call unalignedNucleotideSequence with filter`( - description: String, - request: (String) -> MockHttpServletRequestBuilder, - ) { + @ParameterizedTest(name = "{0}") + @MethodSource("getUnalignedRequestsWithFilter") + fun `should call unalignedNucleotideSequence with filter`(scenario: SequenceEndpointTestScenario) { every { siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(mapOf("country" to "Switzerland")), SequenceType.UNALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue - mockMvc.perform(request("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE")) + val responseContent = mockMvc.perform(scenario.request) .andExpect(status().isOk) - .andExpect(content().string(expectedFasta)) .andExpect(header().stringValues("Lapis-Data-Version", "1234")) + .andReturn() + .response + .contentAsString + + scenario.mockData.assertDataMatches(responseContent) } @ParameterizedTest(name = "should {0} unalignedNucleotideSequence with empty filter") @@ -163,11 +165,25 @@ class SingleSegmentedSequenceControllerTest( siloQueryModelMock.getGenomicSequence( sequenceFiltersRequest(emptyMap()), SequenceType.ALIGNED, - "someSegment", + SEGMENT_NAME, ) } returns returnedValue mockMvc.perform(request("$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE/someSegment")) .andExpect(status().isNotFound) } + + companion object { + @JvmStatic + val alignedRequestsWithFilter = SequenceEndpointTestScenario.createScenarios( + route = "$ALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE", + sequenceName = SEGMENT_NAME, + ) + + @JvmStatic + val unalignedRequestsWithFilter = SequenceEndpointTestScenario.createScenarios( + route = "$UNALIGNED_NUCLEOTIDE_SEQUENCES_ROUTE", + sequenceName = SEGMENT_NAME, + ) + } } diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/silo/SiloClientTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/silo/SiloClientTest.kt index f9a03f35..5a41eef6 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/silo/SiloClientTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/silo/SiloClientTest.kt @@ -164,9 +164,9 @@ class SiloClientTest( assertThat( result, containsInAnyOrder( - SequenceData(sequenceKey = "key1", sequence = "ABCD"), - SequenceData(sequenceKey = "key2", sequence = "DEFG"), - SequenceData(sequenceKey = "key3", sequence = null), + SequenceData(sequenceKey = "key1", sequence = "ABCD", sequenceName = "someSequenceName"), + SequenceData(sequenceKey = "key2", sequence = "DEFG", sequenceName = "someSequenceName"), + SequenceData(sequenceKey = "key3", sequence = null, sequenceName = "someSequenceName"), ), ) }