Skip to content

Commit

Permalink
[gatsby-source-wordpress] support for exlcuded manufacturers and types (
Browse files Browse the repository at this point in the history
#4538)

* [gatsby-source-wordpress] support for excluded manufacturers and types

* [gatsby-source-wordpress] first take on globe exclusion matching

* [gatsby-source-wordpress] route based globbing

* [gatsby-source-wordpress] correcint excludeRoutes README

* [gatsby-source-wordpress] iterate excludedRoutes

* [gatsby-source-wordpress] adding minimatch as dependency

* [gatsby-source-wordpress] renaming concurrentRequests parameter for consistency

also added doc note for concurrentRequests and fixed bug in getPages call that prevented verbose output

* [gatsby-source-wordpress] adding better-queue as explicit dependency
  • Loading branch information
lightstrike authored and sebastienfi committed Mar 28, 2018
1 parent 10a1faf commit ce71ed3
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ node_modules/
# IDE specific
.idea/
.vscode/
*.sw*

.serverless/
13 changes: 11 additions & 2 deletions packages/gatsby-source-wordpress/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,22 @@ plugins: [
wpcom_pass: "very-secured-password",
},
// Set verboseOutput to true to display a verbose output on `npm run develop` or `npm run build`
// It can help you debug specific API Endpoints problems
// It can help you debug specific API Endpoints problems.
verboseOutput: false,
// Search and Replace Urls across WordPress content
// Set how many pages are retrieved per API request.
perPage: 100,
// Search and Replace Urls across WordPress content.
searchAndReplaceContentUrls: {
sourceUrl: "https://source-url.com",
replacementUrl: "https://replacement-url.com",
},
// Set how many simultaneous requests are sent at once.
concurrentRequests: 10,
// Exclude specific routes using glob parameters
// See: https://github.com/isaacs/minimatch
// Example: `["/*/*/comments", "/yoast/**"]` will exclude routes ending in `comments` and
// all routes that begin with `yoast` from fetch.
excludedRoutes: ["/*/*/comments", "/yoast/**"],
},
},
];
Expand Down
2 changes: 2 additions & 0 deletions packages/gatsby-source-wordpress/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
"dependencies": {
"axios": "^0.16.1",
"babel-runtime": "^6.26.0",
"better-queue": "^3.8.6",
"bluebird": "^3.5.0",
"deep-map": "^1.5.0",
"deep-map-keys": "^1.2.0",
"gatsby-source-filesystem": "^1.5.27",
"json-stringify-safe": "^5.0.1",
"lodash": "^4.17.4",
"minimatch": "^3.0.4",
"qs": "^6.4.0"
},
"deprecated": false,
Expand Down
47 changes: 34 additions & 13 deletions packages/gatsby-source-wordpress/src/fetch.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const querystring = require(`querystring`)
const axios = require(`axios`)
const _ = require(`lodash`)
const minimatch = require(`minimatch`)
const colorized = require(`./output-color`)
const httpExceptionHandler = require(`./http-exception-handler`)
const requestInQueue = require(`./request-in-queue`)
Expand All @@ -10,16 +11,17 @@ const requestInQueue = require(`./request-in-queue`)
* site.
*/
async function fetch({
baseUrl,
_verbose,
_siteURL,
_useACF,
_hostingWPCOM,
_auth,
_perPage,
baseUrl,
_concurrentRequests,
_excludedRoutes,
typePrefix,
refactoredEntityTypes,
concurrentRequests,
}) {
// If the site is hosted on wordpress.com, the API Route differs.
// Same entity types are exposed (excepted for medias and users which need auth)
Expand Down Expand Up @@ -104,6 +106,7 @@ async function fetch({
_verbose,
_useACF,
_hostingWPCOM,
_excludedRoutes,
typePrefix,
refactoredEntityTypes,
})
Expand All @@ -129,7 +132,7 @@ async function fetch({
_hostingWPCOM,
_auth,
_accessToken,
concurrentRequests,
_concurrentRequests,
})
)
if (_verbose) console.log(``)
Expand Down Expand Up @@ -188,7 +191,7 @@ async function fetchData({
_hostingWPCOM,
_auth,
_accessToken,
concurrentRequests,
_concurrentRequests,
}) {
const type = route.type
const url = route.url
Expand All @@ -204,7 +207,7 @@ async function fetchData({
if (_verbose) console.time(`Fetching the ${type} took`)

let routeResponse = await getPages(
{ url, _perPage, _hostingWPCOM, _auth, _accessToken, getPages, concurrentRequests },
{ url, _perPage, _hostingWPCOM, _auth, _accessToken, _verbose, _concurrentRequests },
1
)

Expand Down Expand Up @@ -267,7 +270,7 @@ async function fetchData({
* @returns
*/
async function getPages(
{ url, _perPage, _hostingWPCOM, _auth, _accessToken, _verbose, concurrentRequests },
{ url, _perPage, _hostingWPCOM, _auth, _accessToken, _concurrentRequests, _verbose },
page = 1
) {
try {
Expand Down Expand Up @@ -319,7 +322,7 @@ async function getPages(
// We got page 1, now we want pages 2 through totalPages
const pageOptions = _.range(2, totalPages + 1).map(getPage => getOptions(getPage))

const pages = await requestInQueue(pageOptions, { concurrent: concurrentRequests })
const pages = await requestInQueue(pageOptions, { concurrent: _concurrentRequests })

const pageData = pages.map(page => page.data)
pageData.forEach(list => {
Expand Down Expand Up @@ -347,6 +350,7 @@ function getValidRoutes({
_verbose,
_useACF,
_hostingWPCOM,
_excludedRoutes,
typePrefix,
refactoredEntityTypes,
}) {
Expand All @@ -371,7 +375,20 @@ function getValidRoutes({
``,
baseUrl,
]
if (!excludedTypes.includes(entityType)) {

const routePath = getRoutePath(url, route._links.self)

if (excludedTypes.includes(entityType)) {
if (_verbose)
console.log(
colorized.out(`Invalid route.`, colorized.color.Font.FgRed)
)
} else if (_excludedRoutes.some(excludedRoute => minimatch(routePath, excludedRoute))) {
if (_verbose)
console.log(
colorized.out(`Excluded route from excludedRoutes pattern.`, colorized.color.Font.FgYellow)
)
} else {
if (_verbose)
console.log(
colorized.out(
Expand Down Expand Up @@ -409,11 +426,6 @@ function getValidRoutes({
break
}
validRoutes.push({ url: route._links.self, type: validType })
} else {
if (_verbose)
console.log(
colorized.out(`Invalid route.`, colorized.color.Font.FgRed)
)
}
} else {
if (_verbose)
Expand Down Expand Up @@ -457,6 +469,15 @@ const getRawEntityType = route =>
route._links.self.length
)

/**
* Extract the route path for an endpoint
*
* @param {any} baseUrl The base site URL that should be removed
* @param {any} fullUrl The full URL to retrieve the route path from
*/
const getRoutePath = (baseUrl, fullUrl) =>
fullUrl.replace(baseUrl, ``)

/**
* Extract the route manufacturer
*
Expand Down
8 changes: 7 additions & 1 deletion packages/gatsby-source-wordpress/src/gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ let _useACF = true
let _hostingWPCOM
let _auth
let _perPage
let _concurrentRequests
let _excludedRoutes

exports.sourceNodes = async (
{ boundActionCreators, getNode, store, cache, createNodeId },
Expand All @@ -29,6 +31,7 @@ exports.sourceNodes = async (
perPage = 100,
searchAndReplaceContentUrls = {},
concurrentRequests = 10,
excludedRoutes = [],
}
) => {
const { createNode } = boundActionCreators
Expand All @@ -38,6 +41,8 @@ exports.sourceNodes = async (
_hostingWPCOM = hostingWPCOM
_auth = auth
_perPage = perPage
_concurrentRequests = concurrentRequests
_excludedRoutes = excludedRoutes

let entities = await fetch({
baseUrl,
Expand All @@ -47,9 +52,10 @@ exports.sourceNodes = async (
_hostingWPCOM,
_auth,
_perPage,
_concurrentRequests,
_excludedRoutes,
typePrefix,
refactoredEntityTypes,
concurrentRequests,
})

// Normalize data & create nodes
Expand Down

0 comments on commit ce71ed3

Please sign in to comment.