Skip to content

Commit c0b6e47

Browse files
u12206050Haroenv
andauthored
feat: Partial updates (#27)
* Added support for Algolia Partial Updates * Updated readme * Fixed partial udpates on existing index * Fixed deleting issue when there are multiple queries on the same index * bug :) * Remove test function * Update README.md Co-Authored-By: u12206050 <gerard@day4.no> * Update README.md * Resolved suggested changes * Fixed attributesToRetrieve * cleanup & resolve messages Co-authored-by: Gerard Lamusse <> Co-authored-by: Haroen Viaene <fingebimus@me.com> Co-authored-by: Haroen Viaene <hello@haroen.me>
1 parent fcba882 commit c0b6e47

File tree

3 files changed

+185
-19
lines changed

3 files changed

+185
-19
lines changed

README.md

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ const queries = [
5858
settings: {
5959
// optional, any index settings
6060
},
61+
matchFields: ['slug', 'modified'], // Array<String> overrides main match fields, optional
6162
},
6263
];
6364

@@ -67,20 +68,51 @@ module.exports = {
6768
resolve: `gatsby-plugin-algolia`,
6869
options: {
6970
appId: process.env.ALGOLIA_APP_ID,
71+
// Careful, no not prefix this with GATSBY_, since that way users can change
72+
// the data in the index.
7073
apiKey: process.env.ALGOLIA_API_KEY,
71-
indexName: process.env.ALGOLIA_INDEX_NAME, // for all queries
74+
indexName: process.env.ALGOLIA_API_KEY, // for all queries
7275
queries,
7376
chunkSize: 10000, // default: 1000
77+
settings: {
78+
// optional, any index settings
79+
},
80+
enablePartialUpdates: true, // default: false
81+
matchFields: ['slug', 'modified'], // Array<String> default: ['modified']
7482
},
7583
},
7684
],
7785
};
7886
```
7987

80-
The `transformer` field accepts a function and optionally you may provide an `async` function.
81-
8288
The index will be synchronised with the provided index name on Algolia on the `build` step in Gatsby. This is not done earlier to prevent you going over quota while developing.
8389

90+
91+
## Partial Updates
92+
93+
By default all records will be reindexed on every build. To enable only indexing the new, changed and deleted records include the following in the options of the plugin:
94+
95+
```js
96+
resolve: `gatsby-plugin-algolia`,
97+
options: {
98+
/* ... */
99+
enablePartialUpdates: true,
100+
/* (optional) Fields to use for comparing if the index object is different from the new one */
101+
/* By default it uses a field called "modified" which could be a boolean | datetime string */
102+
matchFields: ['slug', 'modified'] // Array<String> default: ['modified']
103+
}
104+
```
105+
106+
This saves a lot of Algolia operations since you don't reindex everything on everybuild.
107+
108+
### Advanced
109+
110+
You can also specify `matchFields` per query to check for different fields based on the type of objects you are indexing.
111+
112+
## Transformer
113+
114+
The `transformer` field accepts a function and optionally you may provide an `async` function. This is useful when you want to change e.g. "edges.node" to simply an array.
115+
84116
# Feedback
85117

86118
This is the very first version of our plugin and isn't yet officially supported. Please leave all your feedback in GitHub issues 😊

example/gatsby-config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const query = `{
1111
# be inserted by Algolia automatically
1212
# and will be less simple to update etc.
1313
objectID: id
14+
updated
1415
component
1516
path
1617
componentChunkName
@@ -52,6 +53,8 @@ module.exports = {
5253
indexName: process.env.ALGOLIA_INDEXNAME, // for all queries
5354
queries,
5455
chunkSize: 10000, // default: 1000
56+
enablePartialUpdates: true, // default: true
57+
matchFields: ['matchFields'],
5558
},
5659
},
5760
],

gatsby-node.js

Lines changed: 147 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,60 @@ const report = require('gatsby-cli/lib/reporter');
77
*
88
* @param {any} obj what to keep the same
99
*/
10-
const identity = obj => obj;
10+
const identity = (obj) => obj;
1111

12-
exports.onPostBuild = async function(
12+
/**
13+
* Fetches all records for the current index from Algolia
14+
*
15+
* @param {AlgoliaIndex} index eg. client.initIndex('your_index_name');
16+
* @param {Array<String>} attributesToRetrieve eg. ['modified', 'slug']
17+
*/
18+
function fetchAlgoliaObjects(index, attributesToRetrieve = ['modified']) {
19+
return new Promise((resolve, reject) => {
20+
const browser = index.browseAll('', { attributesToRetrieve });
21+
const hits = {};
22+
23+
browser.on('result', (content) => {
24+
if (Array.isArray(content.hits)) {
25+
content.hits.forEach((hit) => {
26+
hits[hit.objectID] = hit;
27+
});
28+
}
29+
});
30+
browser.on('end', () => resolve(hits));
31+
browser.on('error', (err) => reject(err));
32+
});
33+
}
34+
35+
exports.onPostBuild = async function (
1336
{ graphql },
14-
{ appId, apiKey, queries, indexName: mainIndexName, chunkSize = 1000 }
37+
{
38+
appId,
39+
apiKey,
40+
queries,
41+
indexName: mainIndexName,
42+
chunkSize = 1000,
43+
enablePartialUpdates = false,
44+
matchFields: mainMatchFields = ['modified'],
45+
}
1546
) {
1647
const activity = report.activityTimer(`index to Algolia`);
1748
activity.start();
49+
1850
const client = algoliasearch(appId, apiKey);
1951

2052
setStatus(activity, `${queries.length} queries to index`);
2153

54+
const indexState = {};
55+
2256
const jobs = queries.map(async function doQuery(
2357
{
2458
indexName = mainIndexName,
2559
query,
2660
transformer = identity,
2761
settings,
2862
forwardToReplicas,
63+
matchFields = mainMatchFields,
2964
},
3065
i
3166
) {
@@ -34,27 +69,92 @@ exports.onPostBuild = async function(
3469
`failed to index to Algolia. You did not give "query" to this query`
3570
);
3671
}
72+
if (!Array.isArray(matchFields) || !matchFields.length) {
73+
return report.panic(
74+
`failed to index to Algolia. Argument matchFields has to be an array of strings`
75+
);
76+
}
77+
3778
const index = client.initIndex(indexName);
38-
const mainIndexExists = await indexExists(index);
39-
const tmpIndex = client.initIndex(`${indexName}_tmp`);
40-
const indexToUse = mainIndexExists ? tmpIndex : index;
79+
const tempIndex = client.initIndex(`${indexName}_tmp`);
80+
const indexToUse = await getIndexToUse({
81+
index,
82+
tempIndex,
83+
enablePartialUpdates,
84+
});
4185

42-
if (mainIndexExists) {
43-
setStatus(activity, `query ${i}: copying existing index`);
44-
await scopedCopyIndex(client, index, tmpIndex);
86+
/* Use to keep track of what to remove afterwards */
87+
if (!indexState[indexName]) {
88+
indexState[indexName] = {
89+
index,
90+
toRemove: {},
91+
};
4592
}
93+
const currentIndexState = indexState[indexName];
4694

4795
setStatus(activity, `query ${i}: executing query`);
4896
const result = await graphql(query);
4997
if (result.errors) {
5098
report.panic(`failed to index to Algolia`, result.errors);
5199
}
100+
52101
const objects = await transformer(result);
53-
const chunks = chunk(objects, chunkSize);
102+
103+
if (objects.length > 0 && !objects[0].objectID) {
104+
report.panic(
105+
`failed to index to Algolia. Query results do not have 'objectID' key`
106+
);
107+
}
108+
109+
setStatus(
110+
activity,
111+
`query ${i}: graphql resulted in ${Object.keys(objects).length} records`
112+
);
113+
114+
let hasChanged = objects;
115+
let algoliaObjects = {};
116+
if (enablePartialUpdates) {
117+
setStatus(activity, `query ${i}: starting Partial updates`);
118+
119+
algoliaObjects = await fetchAlgoliaObjects(indexToUse, matchFields);
120+
121+
const nbMatchedRecords = Object.keys(algoliaObjects).length;
122+
setStatus(
123+
activity,
124+
`query ${i}: found ${nbMatchedRecords} existing records`
125+
);
126+
127+
if (nbMatchedRecords) {
128+
hasChanged = objects.filter((curObj) => {
129+
const ID = curObj.objectID;
130+
let extObj = algoliaObjects[ID];
131+
132+
/* The object exists so we don't need to remove it from Algolia */
133+
delete algoliaObjects[ID];
134+
delete currentIndexState.toRemove[ID];
135+
136+
if (!extObj) return true;
137+
138+
return !!matchFields.find((field) => extObj[field] !== curObj[field]);
139+
});
140+
141+
Object.keys(algoliaObjects).forEach(
142+
({ objectID }) => (currentIndexState.toRemove[objectID] = true)
143+
);
144+
}
145+
146+
setStatus(
147+
activity,
148+
`query ${i}: Partial updates – [insert/update: ${hasChanged.length}, total: ${objects.length}]`
149+
);
150+
}
151+
152+
const chunks = chunk(hasChanged, chunkSize);
54153

55154
setStatus(activity, `query ${i}: splitting in ${chunks.length} jobs`);
56155

57-
const chunkJobs = chunks.map(async function(chunked) {
156+
/* Add changed / new objects */
157+
const chunkJobs = chunks.map(async function (chunked) {
58158
const { taskID } = await indexToUse.addObjects(chunked);
59159
return indexToUse.waitTask(taskID);
60160
});
@@ -69,21 +169,41 @@ exports.onPostBuild = async function(
69169
const { replicas, ...adjustedSettings } = settings;
70170

71171
const { taskID } = await indexToUse.setSettings(
72-
indexToUse === tmpIndex ? adjustedSettings : settings,
172+
indexToUse === tempIndex ? adjustedSettings : settings,
73173
extraModifiers
74174
);
75-
175+
76176
await indexToUse.waitTask(taskID);
77177
}
78178

79-
if (mainIndexExists) {
179+
if (indexToUse === tempIndex) {
80180
setStatus(activity, `query ${i}: moving copied index to main index`);
81-
return moveIndex(client, tmpIndex, index);
181+
return moveIndex(client, indexToUse, index);
82182
}
83183
});
84184

85185
try {
86186
await Promise.all(jobs);
187+
188+
if (enablePartialUpdates) {
189+
/* Execute once per index */
190+
/* This allows multiple queries to overlap */
191+
const cleanup = Object.keys(indexState).map(async function (indexName) {
192+
const state = indexState[indexName];
193+
const isRemoved = Object.keys(state.toRemove);
194+
195+
if (isRemoved.length) {
196+
setStatus(
197+
activity,
198+
`deleting ${isRemoved.length} objects from ${indexName} index`
199+
);
200+
const { taskID } = await state.index.deleteObjects(isRemoved);
201+
return state.index.waitTask(taskID);
202+
}
203+
});
204+
205+
await Promise.all(cleanup);
206+
}
87207
} catch (err) {
88208
report.panic(`failed to index to Algolia`, err);
89209
}
@@ -130,7 +250,7 @@ function indexExists(index) {
130250
return index
131251
.getSettings()
132252
.then(() => true)
133-
.catch(error => {
253+
.catch((error) => {
134254
if (error.statusCode !== 404) {
135255
throw error;
136256
}
@@ -152,3 +272,14 @@ function setStatus(activity, status) {
152272
console.log('Algolia:', status);
153273
}
154274
}
275+
276+
async function getIndexToUse({ index, tempIndex, enablePartialUpdates }) {
277+
if (enablePartialUpdates) {
278+
return index;
279+
}
280+
281+
const mainIndexExists = await indexExists(index);
282+
if (mainIndexExists) {
283+
return tempIndex;
284+
}
285+
}

0 commit comments

Comments
 (0)