Skip to content

Commit

Permalink
First iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
Gerard Lamusse committed Feb 23, 2019
1 parent 8d068e6 commit dd4a523
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 60 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,6 @@ typings/

# next.js build output
.next

# OSX
.DS_Store
83 changes: 23 additions & 60 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
A gridsome plugin to index objects to Algolia

> Ported from [gatsby-plugin-algolia](https://github.com/algolia/gatsby-plugin-algolia)
> Feel free to open issues for any questions or ideas
You can specify a list of queries to run and how to transform them into an array of objects to index. When you run `gridsome build`, it will publish those to Algolia.
You can specify a list of collections to run and how to transform them into an array of objects to index. When you run `gridsome build`, it will publish those to Algolia.

Here we have an example with some data that might not be very relevant, but will work with the default configuration of `gridsome new`

```sh
$ yarn add gridsome-plugin-algolia
```

## Install
* `yarn add gridsome-plugin-algolia`
* `npm install gridsome-plugin-algolia -S`


## Setup

First add credentials to a .env file, which you won't commit. If you track this in your file, and especially if the site is open source, you will leak your admin API key. This would mean anyone is able to change anything on your Algolia index.

Expand All @@ -21,83 +24,43 @@ ALGOLIA_APP_KEY=XXX
ALGOLIA_INDEX_NAME=XXX
```

## Usage

```javascript:title=gridsome-config.js

require('dotenv').config({
path: `.env.${process.env.NODE_ENV}`,
})

// gridsome-config.js
const myQuery = `{
allSitePage {
edges {
node {
# try to find a unique id for each node
# if this field is absent, it's going to
# be inserted by Algolia automatically
# and will be less simple to update etc.
objectID: id
component
path
componentChunkName
jsonName
internal {
type
contentDigest
owner
}
}
}
}
}`;

const queries = [

const collections = [
{
query: myQuery,
transformer: ({ data }) => data.allSitePage.edges.map(({ node }) => node), // optional
indexName: 'index name to target', // overrides main index name, optional
matchFields: ['slug', 'modified'], // Array<String> overrides main match fields, optional
contentTypeName: 'BlogPost',
indexName: 'posts', // Algolia index name
itemFormatter: (item) => {
return { objectID: item.id, title: item.title, slug: item.slug, modified: item.modified };
}, // optional
matchFields: ['slug', 'modified'], // Array<String> required with PartialUpdates
},
];

module.exports = {
plugins: [
{
resolve: `gridsome-plugin-algolia`,
use: `gridsome-plugin-algolia`,
options: {
appId: process.env.ALGOLIA_APP_ID,
apiKey: process.env.ALGOLIA_API_KEY,
indexName: "index name to target", // for all queries
queries,
collections,
chunkSize: 10000, // default: 1000
settings: {
// optional, any index settings
},
enablePartialUpdates: true, // default: false
matchFields: ['slug', 'modified'], // Array<String> default: ['modified']
},
},
],
};
```

## Partial Updates

By default all records will be reindexed on every build. To enable only indexing the new, changed and deleted records include the following in the options of the plugin:

```javascript:title=gridsome-config.js
resolve: `gridsome-plugin-algolia`,
options: {
/* ... */
enablePartialUpdates: true,
/* (optional) Fields to use for comparing if the index object is different from the new one */
/* By default it uses a field called "modified" which could be a boolean | datetime string */
matchFields: ['slug', 'modified'] // Array<String> default: ['modified']
}
```

This saves a lot of

### Advanced

You can also specify `matchFields` per query to check for different fields based on the type of objects you are indexing.
### Partial Updates

By default all items will be reindexed on every build. To enable only indexing new, changed and deleted items, set `enablePartialUpdates` to `true` and make sure `matchFields` is correct for every collection.
215 changes: 215 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
module.exports = function (api, options) {
const algoliasearch = require('algoliasearch');
const chunk = require('lodash.chunk');

/**
* give back the same thing as this was called with.
*
* @param {any} item what to keep the same
*/
const defaultTransformer = (item) => {
return {
objectID: item.id,
title: item.title,
slug: item.slug,
modified: item.modified,
};
}

const indexHits = {}

/**
* Fetches all items for the current index from Algolia
*
* @param {AlgoliaIndex} index eg. client.initIndex('your_index_name');
* @param {Array<String>} attributesToRetrieve eg. ['modified', 'slug']
*/
function fetchAlgoliaObjects(index, attributesToRetrieve) {
return new Promise((resolve, reject) => {
/* Check if we havn't already fetched this index */
if (indexHits[index.indexName]) return resolve(indexHits[index.indexName])

const browser = index.browseAll('', { attributesToRetrieve['modified'] });
const hits = {};

browser.on('result', (content) => {
if (Array.isArray(content.hits)) {
content.hits.forEach(hit => {
hits[hit.objectID] = hit
})
}
});
browser.on('end', () => {
indexHits[index.indexName] = hits
resolve(hits)
});
browser.on('error', (err) => reject(err) );
});
}

api.afterBuild(async ({ store, config }) => {
if (!config.siteUrl) {
throw new Error(`Sitemap plugin is missing a required siteUrl config.`)
}

const started = Date.now()

const { appId, apiKey, collections, chunkSize = 1000, enablePartialUpdates = false } = options

const jobs = collections.map(async (
{ indexName, itemFormatter = defaultTransformer, contentTypeName, matchFields = ['modified'] },
cIndex
) => {
if (!contentTypeName) throw `Algolia failed collection #${cIndex}: contentTypeName required`;

if (!Array.isArray(matchFields) || !matchFields.length) throw `Algolia failed ${cIndex}: matchFields required array of strings`;

/* Use to keep track of what to remove afterwards */
if (!indexState[indexName]) indexState[indexName] = {
index: client.initIndex(indexName),
toRemove: {}
}
const currentIndexState = indexState[indexName];

const { index } = currentIndexState;
/* Use temp index if main index already exists */
let useTempIndex = false
const indexToUse = await (async (_index) => {
if (!enablePartialUpdates) {
if (useTempIndex = await indexExists(_index)) {
const tmpIndex = client.initIndex(`${indexName}_tmp`);
await scopedCopyIndex(client, _index, tmpIndex);
return tmpIndex;
}
}
return _index
})(index)

console.log(`Algolia collection #${i}: getting ${contentTypeName}`);

const { collection } = store.getContentType(contentTypeName)

const items = collection.data.map(itemFormatter)
if (items.length > 0 && !items[0].objectID) {
throw `Algolia failed collection #${cIndex}. Query results do not have 'objectID' key`;
}

console.log(`Algolia collection #${i}: items in collection ${Object.keys(items).length}`);

let hasChanged = items;
if (enablePartialUpdates) {
console.log(`Algolia collection #${i}: starting Partial updates`);

const algoliaItems = await fetchAlgoliaObjects(indexToUse, matchFields);

const results = Object.keys(algoliaItems).length
console.log(`Algolia collection #${i}: found ${results} existing items`);

if (results) {
hasChanged = items.filter(curObj => {
const {objectID} = curObj
let extObj = algoliaItems[objectID]

/* The object exists so we don't need to remove it from Algolia */
delete(algoliaItems[objectID]);
delete(currentIndexState.toRemove[objectID])

if (!extObj) return true;

return !!matchFields.find(field => extObj[field] !== curObj[field]);
});

Object.keys(algoliaItems).forEach(({ objectID }) => currentIndexState.toRemove[objectID] = true)
}

console.log(`Algolia collection #${i}: Partial updates – [insert/update: ${hasChanged.length}, total: ${items.length}]`);
}

const chunks = chunk(hasChanged, chunkSize);

console.log(`Algolia collection #${i}: splitting in ${chunks.length} jobs`);

/* Add changed / new items */
const chunkJobs = chunks.map(async function(chunked) {
const { taskID } = await indexToUse.addObjects(chunked);
return indexToUse.waitTask(taskID);
});

await Promise.all(chunkJobs);

if (useTempIndex) {
console.log(`Algolia collection #${i}: moving copied index to main index`);
return moveIndex(client, indexToUse, index);
}
});

try {
await Promise.all(jobs)
if (enablePartialUpdates) {
/* Execute once per index */
/* This allows multiple queries to overlap */
const cleanup = Object.keys(indexState).map(async function(indexName) {
const state = indexState[indexName];
const isRemoved = Object.keys(state.toRemove);

if (isRemoved.length) {
console.log(`Algolia: deleting ${isRemoved.length} items from ${indexName} index`);
const { taskID } = await state.index.deleteObjects(isRemoved);
return state.index.waitTask(taskID);
}
})

await Promise.all(cleanup);
}
} catch (err) {
throw (`Algolia failed collection #${cIndex}`, err);
}

console.log(`Finished indexing to Algolia in ${Date.now() - started}ms`);
})
}

/**
* Copy the settings, synonyms, and rules of the source index to the target index
* @param client
* @param sourceIndex
* @param targetIndex
* @return {Promise}
*/
async function scopedCopyIndex(client, sourceIndex, targetIndex) {
const { taskID } = await client.copyIndex(
sourceIndex.indexName,
targetIndex.indexName,
['settings', 'synonyms', 'rules']
);
return targetIndex.waitTask(taskID);
}

/**
* moves the source index to the target index
* @param client
* @param sourceIndex
* @param targetIndex
* @return {Promise}
*/
async function moveIndex(client, sourceIndex, targetIndex) {
const { taskID } = await client.moveIndex(
sourceIndex.indexName,
targetIndex.indexName
);
return targetIndex.waitTask(taskID);
}

/**
* Does an Algolia index exist already
*
* @param index
*/
async function indexExists(index) {
try {
const { nbHits } = await index.search();
return nbHits > 0;
} catch (e) {
return false;
}
}

0 comments on commit dd4a523

Please sign in to comment.