Skip to content

Commit

Permalink
feat: Partial updates (#27)
Browse files Browse the repository at this point in the history
* Added support for Algolia Partial Updates

* Updated readme

* Fixed partial udpates on existing index

* Fixed deleting issue when there are multiple queries on the same index

* bug :)

* Remove test function

* Update README.md

Co-Authored-By: u12206050 <gerard@day4.no>

* Update README.md

* Resolved suggested changes

* Fixed attributesToRetrieve

* cleanup & resolve messages

Co-authored-by: Gerard Lamusse <>
Co-authored-by: Haroen Viaene <fingebimus@me.com>
Co-authored-by: Haroen Viaene <hello@haroen.me>
  • Loading branch information
3 people authored Apr 20, 2020
1 parent fcba882 commit c0b6e47
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 19 deletions.
38 changes: 35 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ const queries = [
settings: {
// optional, any index settings
},
matchFields: ['slug', 'modified'], // Array<String> overrides main match fields, optional
},
];

Expand All @@ -67,20 +68,51 @@ module.exports = {
resolve: `gatsby-plugin-algolia`,
options: {
appId: process.env.ALGOLIA_APP_ID,
// Careful, no not prefix this with GATSBY_, since that way users can change
// the data in the index.
apiKey: process.env.ALGOLIA_API_KEY,
indexName: process.env.ALGOLIA_INDEX_NAME, // for all queries
indexName: process.env.ALGOLIA_API_KEY, // for all queries
queries,
chunkSize: 10000, // default: 1000
settings: {
// optional, any index settings
},
enablePartialUpdates: true, // default: false
matchFields: ['slug', 'modified'], // Array<String> default: ['modified']
},
},
],
};
```

The `transformer` field accepts a function and optionally you may provide an `async` function.

The index will be synchronised with the provided index name on Algolia on the `build` step in Gatsby. This is not done earlier to prevent you going over quota while developing.


## Partial Updates

By default all records will be reindexed on every build. To enable only indexing the new, changed and deleted records include the following in the options of the plugin:

```js
resolve: `gatsby-plugin-algolia`,
options: {
/* ... */
enablePartialUpdates: true,
/* (optional) Fields to use for comparing if the index object is different from the new one */
/* By default it uses a field called "modified" which could be a boolean | datetime string */
matchFields: ['slug', 'modified'] // Array<String> default: ['modified']
}
```

This saves a lot of Algolia operations since you don't reindex everything on everybuild.

### Advanced

You can also specify `matchFields` per query to check for different fields based on the type of objects you are indexing.

## Transformer

The `transformer` field accepts a function and optionally you may provide an `async` function. This is useful when you want to change e.g. "edges.node" to simply an array.

# Feedback

This is the very first version of our plugin and isn't yet officially supported. Please leave all your feedback in GitHub issues 😊
3 changes: 3 additions & 0 deletions example/gatsby-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const query = `{
# be inserted by Algolia automatically
# and will be less simple to update etc.
objectID: id
updated
component
path
componentChunkName
Expand Down Expand Up @@ -52,6 +53,8 @@ module.exports = {
indexName: process.env.ALGOLIA_INDEXNAME, // for all queries
queries,
chunkSize: 10000, // default: 1000
enablePartialUpdates: true, // default: true
matchFields: ['matchFields'],
},
},
],
Expand Down
163 changes: 147 additions & 16 deletions gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,60 @@ const report = require('gatsby-cli/lib/reporter');
*
* @param {any} obj what to keep the same
*/
const identity = obj => obj;
const identity = (obj) => obj;

exports.onPostBuild = async function(
/**
* Fetches all records for the current index from Algolia
*
* @param {AlgoliaIndex} index eg. client.initIndex('your_index_name');
* @param {Array<String>} attributesToRetrieve eg. ['modified', 'slug']
*/
function fetchAlgoliaObjects(index, attributesToRetrieve = ['modified']) {
return new Promise((resolve, reject) => {
const browser = index.browseAll('', { attributesToRetrieve });
const hits = {};

browser.on('result', (content) => {
if (Array.isArray(content.hits)) {
content.hits.forEach((hit) => {
hits[hit.objectID] = hit;
});
}
});
browser.on('end', () => resolve(hits));
browser.on('error', (err) => reject(err));
});
}

exports.onPostBuild = async function (
{ graphql },
{ appId, apiKey, queries, indexName: mainIndexName, chunkSize = 1000 }
{
appId,
apiKey,
queries,
indexName: mainIndexName,
chunkSize = 1000,
enablePartialUpdates = false,
matchFields: mainMatchFields = ['modified'],
}
) {
const activity = report.activityTimer(`index to Algolia`);
activity.start();

const client = algoliasearch(appId, apiKey);

setStatus(activity, `${queries.length} queries to index`);

const indexState = {};

const jobs = queries.map(async function doQuery(
{
indexName = mainIndexName,
query,
transformer = identity,
settings,
forwardToReplicas,
matchFields = mainMatchFields,
},
i
) {
Expand All @@ -34,27 +69,92 @@ exports.onPostBuild = async function(
`failed to index to Algolia. You did not give "query" to this query`
);
}
if (!Array.isArray(matchFields) || !matchFields.length) {
return report.panic(
`failed to index to Algolia. Argument matchFields has to be an array of strings`
);
}

const index = client.initIndex(indexName);
const mainIndexExists = await indexExists(index);
const tmpIndex = client.initIndex(`${indexName}_tmp`);
const indexToUse = mainIndexExists ? tmpIndex : index;
const tempIndex = client.initIndex(`${indexName}_tmp`);
const indexToUse = await getIndexToUse({
index,
tempIndex,
enablePartialUpdates,
});

if (mainIndexExists) {
setStatus(activity, `query ${i}: copying existing index`);
await scopedCopyIndex(client, index, tmpIndex);
/* Use to keep track of what to remove afterwards */
if (!indexState[indexName]) {
indexState[indexName] = {
index,
toRemove: {},
};
}
const currentIndexState = indexState[indexName];

setStatus(activity, `query ${i}: executing query`);
const result = await graphql(query);
if (result.errors) {
report.panic(`failed to index to Algolia`, result.errors);
}

const objects = await transformer(result);
const chunks = chunk(objects, chunkSize);

if (objects.length > 0 && !objects[0].objectID) {
report.panic(
`failed to index to Algolia. Query results do not have 'objectID' key`
);
}

setStatus(
activity,
`query ${i}: graphql resulted in ${Object.keys(objects).length} records`
);

let hasChanged = objects;
let algoliaObjects = {};
if (enablePartialUpdates) {
setStatus(activity, `query ${i}: starting Partial updates`);

algoliaObjects = await fetchAlgoliaObjects(indexToUse, matchFields);

const nbMatchedRecords = Object.keys(algoliaObjects).length;
setStatus(
activity,
`query ${i}: found ${nbMatchedRecords} existing records`
);

if (nbMatchedRecords) {
hasChanged = objects.filter((curObj) => {
const ID = curObj.objectID;
let extObj = algoliaObjects[ID];

/* The object exists so we don't need to remove it from Algolia */
delete algoliaObjects[ID];
delete currentIndexState.toRemove[ID];

if (!extObj) return true;

return !!matchFields.find((field) => extObj[field] !== curObj[field]);
});

Object.keys(algoliaObjects).forEach(
({ objectID }) => (currentIndexState.toRemove[objectID] = true)
);
}

setStatus(
activity,
`query ${i}: Partial updates – [insert/update: ${hasChanged.length}, total: ${objects.length}]`
);
}

const chunks = chunk(hasChanged, chunkSize);

setStatus(activity, `query ${i}: splitting in ${chunks.length} jobs`);

const chunkJobs = chunks.map(async function(chunked) {
/* Add changed / new objects */
const chunkJobs = chunks.map(async function (chunked) {
const { taskID } = await indexToUse.addObjects(chunked);
return indexToUse.waitTask(taskID);
});
Expand All @@ -69,21 +169,41 @@ exports.onPostBuild = async function(
const { replicas, ...adjustedSettings } = settings;

const { taskID } = await indexToUse.setSettings(
indexToUse === tmpIndex ? adjustedSettings : settings,
indexToUse === tempIndex ? adjustedSettings : settings,
extraModifiers
);

await indexToUse.waitTask(taskID);
}

if (mainIndexExists) {
if (indexToUse === tempIndex) {
setStatus(activity, `query ${i}: moving copied index to main index`);
return moveIndex(client, tmpIndex, index);
return moveIndex(client, indexToUse, index);
}
});

try {
await Promise.all(jobs);

if (enablePartialUpdates) {
/* Execute once per index */
/* This allows multiple queries to overlap */
const cleanup = Object.keys(indexState).map(async function (indexName) {
const state = indexState[indexName];
const isRemoved = Object.keys(state.toRemove);

if (isRemoved.length) {
setStatus(
activity,
`deleting ${isRemoved.length} objects from ${indexName} index`
);
const { taskID } = await state.index.deleteObjects(isRemoved);
return state.index.waitTask(taskID);
}
});

await Promise.all(cleanup);
}
} catch (err) {
report.panic(`failed to index to Algolia`, err);
}
Expand Down Expand Up @@ -130,7 +250,7 @@ function indexExists(index) {
return index
.getSettings()
.then(() => true)
.catch(error => {
.catch((error) => {
if (error.statusCode !== 404) {
throw error;
}
Expand All @@ -152,3 +272,14 @@ function setStatus(activity, status) {
console.log('Algolia:', status);
}
}

async function getIndexToUse({ index, tempIndex, enablePartialUpdates }) {
if (enablePartialUpdates) {
return index;
}

const mainIndexExists = await indexExists(index);
if (mainIndexExists) {
return tempIndex;
}
}

0 comments on commit c0b6e47

Please sign in to comment.