Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change pagination to use Elasticsearch search_after instead of 'page' #228

Merged
merged 18 commits into from
Mar 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
of 200 and the updated Item
- Default sortby is now guaranteed to be stable. Previously, it was only by `properties.datetime`, not it is
by `properties.datetime`, `id`, and `collection`.
- ItemCollection results no longer have a `prev` link relation. This is a by-product of changing
pagination to use Elasticsearch's more performant `search_after` mechanism rather than `page`
- Pagination works past 10,000 items now

### Removed

Expand Down
105 changes: 45 additions & 60 deletions src/lib/api.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const { pickBy, assign } = require('lodash')
const { pickBy, assign, get: getNested } = require('lodash')
const gjv = require('geojson-validation')
const extent = require('@mapbox/extent')
const { DateTime } = require('luxon')
Expand Down Expand Up @@ -371,84 +371,70 @@ const collectionsToCatalogLinks = function (results, endpoint) {
}

const wrapResponseInFeatureCollection = function (
meta, features = [], links = []
context, features = [], links = []
) {
return {
type: 'FeatureCollection',
stac_version: process.env.STAC_VERSION || '1.0.0',
stac_extensions: [],
context: meta,
numberMatched: meta.matched,
numberReturned: meta.returned,
context,
numberMatched: context.matched,
numberReturned: context.returned,
features,
links
}
}

const buildPageLinks = function (meta, parameters, bbox, intersects, endpoint, httpMethod) {
const pageLinks = []

const dictToURI = (dict) => (
Object.keys(dict).map(
(p) => {
const buildPaginationLinks = function (limit, parameters, bbox, intersects, endpoint,
httpMethod, sortby, items) {
if (items.length) {
const dictToURI = (dict) => (
Object.keys(dict).map(
(p) => {
// const query = encodeURIComponent(dict[p])
let value = dict[p]
if (typeof value === 'object' && value !== null) {
value = JSON.stringify(value)
}
const query = encodeURIComponent(value)
if (p === 'collections') {
return `${encodeURIComponent(p)}[]=${query}`
let value = dict[p]
if (typeof value === 'object' && value !== null) {
value = JSON.stringify(value)
}
const query = encodeURIComponent(value)
if (p === 'collections') {
return `${encodeURIComponent(p)}[]=${query}`
}
return `${encodeURIComponent(p)}=${query}`
}
return `${encodeURIComponent(p)}=${query}`
}
).join('&')
)
const { matched, page, limit } = meta
const linkParams = pickBy(assign(parameters, { bbox, intersects, limit }))
).join('&')
)

const lastItem = items[items.length - 1]

const nextKeys = sortby ? Object.keys(sortby) : ['properties.datetime', 'id', 'collection']

const next = nextKeys.map((k) => getNested(lastItem, k)).join(',')

const nextParams = pickBy(assign(parameters, { bbox, intersects, limit, next }))

if ((page * limit) < matched) {
const newParams = { ...linkParams, page: page + 1 }
const link = {
rel: 'next',
title: 'Next page of results',
title: 'Next page of Items',
method: httpMethod
}
if (httpMethod === 'GET') {
const nextQueryParameters = dictToURI(newParams)
const nextQueryParameters = dictToURI(nextParams)
link.href = `${endpoint}?${nextQueryParameters}`
} else if (httpMethod === 'POST') {
link.href = endpoint
link.merge = false
link.body = newParams
link.body = nextParams
}
pageLinks.push(link)
return [link]
}
if (page > 1) {
const newParams = { ...linkParams, page: page - 1 }
const link = {
rel: 'prev',
title: 'Previous page of results',
method: httpMethod
}
if (httpMethod === 'GET') {
const prevQueryParameters = dictToURI(newParams)
link.href = `${endpoint}?${prevQueryParameters}`
} else if (httpMethod === 'POST') {
link.href = endpoint
link.merge = false
link.body = newParams
}
pageLinks.push(link)
}

return pageLinks
return []
}

const searchItems = async function (collectionId, queryParameters, backend, endpoint, httpMethod) {
logger.debug(`Query parameters: ${JSON.stringify(queryParameters)}`)
const {
page,
next,
bbox,
intersects
} = queryParameters
Expand All @@ -475,7 +461,7 @@ const searchItems = async function (collectionId, queryParameters, backend, endp
fields,
ids,
collections,
limit
next
})

let newEndpoint = `${endpoint}/search`
Expand All @@ -486,16 +472,15 @@ const searchItems = async function (collectionId, queryParameters, backend, endp

logger.debug(`Search parameters: ${JSON.stringify(searchParams)}`)

let results
let esResponse
try {
results = await backend.search(searchParams, page, limit)
esResponse = await backend.search(searchParams, limit)
} catch (error) {
if (isIndexNotFoundError(error)) {
results = {
esResponse = {
context: {
matched: 0,
returned: 0,
page,
limit
},
results: []
Expand All @@ -505,12 +490,12 @@ const searchItems = async function (collectionId, queryParameters, backend, endp
}
}

const { results: itemsResults, context: itemsMeta } = results
const pageLinks = buildPageLinks(
itemsMeta, searchParams, bbox, intersects, newEndpoint, httpMethod
const { results: responseItems, context } = esResponse
const pageLinks = buildPaginationLinks(
limit, searchParams, bbox, intersects, newEndpoint, httpMethod, sortby, responseItems
)
const items = addItemLinks(itemsResults, endpoint)
const response = wrapResponseInFeatureCollection(itemsMeta, items, pageLinks)
const items = addItemLinks(responseItems, endpoint)
const response = wrapResponseInFeatureCollection(context, items, pageLinks)
return response
}

Expand Down
40 changes: 15 additions & 25 deletions src/lib/es.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ function buildSort(parameters) {
return DEFAULT_SORTING
}

function buildSearchAfter(parameters) {
const { next } = parameters
if (next) {
return next.split(',')
}
return undefined
}

function buildFieldsFilter(parameters) {
const { fields } = parameters
let _sourceIncludes = []
Expand Down Expand Up @@ -316,31 +324,23 @@ async function getCollections(page = 1, limit = 100) {
return results
}

async function constructSearchParams(parameters, page, limit) {
const { id } = parameters
async function constructSearchParams(parameters, limit) {
const { id, collections } = parameters

let body
if (id) {
body = buildIdQuery(id)
} else {
body = buildQuery(parameters)
body.sort = buildSort(parameters) // sort applied to the id query causes hang???
}

let index
// determine the right indices
if (parameters.hasOwnProperty('collections')) {
index = parameters.collections
} else {
index = '*,-*kibana*,-collections'
body.search_after = buildSearchAfter(parameters)
}

// Specifying the scroll parameter makes the total work
const searchParams = {
index,
index: collections || '*,-*kibana*,-collections',
body,
size: limit,
from: (page - 1) * limit,
track_total_hits: true
}

Expand All @@ -356,8 +356,8 @@ async function constructSearchParams(parameters, page, limit) {
return searchParams
}

async function search(parameters, page = 1, limit = 10) {
const searchParams = await constructSearchParams(parameters, page, limit)
async function search(parameters, limit = 10) {
const searchParams = await constructSearchParams(parameters, limit)
const esResponse = await esQuery({
ignore_unavailable: true,
allow_no_indices: true,
Expand All @@ -368,20 +368,10 @@ async function search(parameters, page = 1, limit = 10) {
const response = {
results,
context: {
page: Number(page),
limit: Number(limit),
matched: esResponse.body.hits.total.value,
returned: results.length
},
links: []
}
const nextlink = (((page * limit) < esResponse.body.hits.total.value) ? page + 1 : null)
if (nextlink) {
response.links.push({
title: 'next',
type: 'application/json',
href: nextlink
})
}
}
return response
}
Expand Down
51 changes: 41 additions & 10 deletions tests/system/test-api-search-get.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const test = require('ava')
const { default: got } = require('got')
const { deleteAllIndices, refreshIndices } = require('../helpers/es')
const { randomId } = require('../helpers/utils')
const ingest = require('../../src/lib/ingest')
Expand Down Expand Up @@ -34,7 +35,7 @@ test('GET /search has a content type of "application/geo+json; charset=utf-8', a
t.is(response.headers['content-type'], 'application/geo+json; charset=utf-8')
})

test('/search preserve bbox in prev and next links', async (t) => {
test('/search preserve bbox in next links', async (t) => {
const fixtureFiles = [
'catalog.json',
'collection.json',
Expand All @@ -47,22 +48,46 @@ test('/search preserve bbox in prev and next links', async (t) => {

const bbox = '-180,-90,180,90'

let response = await t.context.api.client.get('search', {

const response = await t.context.api.client.get('search', {
searchParams: new URLSearchParams({
bbox,
limit: 2,
page: 2
}) })
})
})

t.is(response.features.length, 0)
t.is(response.links.length, 1)
t.is(response.features.length, 2)
const nextLink = response.links.find((x) => x.rel === 'next')
const nextUrl = new URL(nextLink.href)
t.deepEqual(nextUrl.searchParams.get('bbox'), bbox)

t.deepEqual(nextUrl.searchParams.get('next'),
[
response.features[1].properties.datetime,
response.features[1].id,
response.features[1].collection
].join(','))

console.log(`nexturl ${nextUrl}`)

const nextResponse = await got.get(nextUrl).json()
t.is(nextResponse.features.length, 0)
t.falsy(nextResponse.links.find((x) => x.rel === 'next'))
})

const prevLink = response.links.find((x) => x.rel === 'prev')
t.deepEqual(new URL(prevLink.href).searchParams.get('bbox'), bbox)
test('/search preserve bbox and datetime in next links', async (t) => {
const fixtureFiles = [
'catalog.json',
'collection.json',
'LC80100102015050LGN00.json',
'LC80100102015082LGN00.json'
]
const items = await Promise.all(fixtureFiles.map((x) => systemTests.loadJson(x)))
await ingest.ingestItems(items, stream)
await refreshIndices()

const bbox = '-180,-90,180,90'
const datetime = '2015-02-19T00:00:00Z/2021-02-19T00:00:00Z'
response = await t.context.api.client.get('search', {
const response = await t.context.api.client.get('search', {
searchParams: new URLSearchParams({
bbox,
datetime: datetime,
Expand All @@ -75,6 +100,12 @@ test('/search preserve bbox in prev and next links', async (t) => {

const nextLink = response.links.find((x) => x.rel === 'next')
const nextUrl = new URL(nextLink.href)
t.deepEqual(nextUrl.searchParams.get('next'),
[
response.features[0].properties.datetime,
response.features[0].id,
response.features[0].collection
].join(','))
t.deepEqual(nextUrl.searchParams.get('bbox'), bbox)
t.deepEqual(nextUrl.searchParams.get('datetime'), datetime)
})
Loading