diff --git a/integration-tests/gatsby-pipeline/__tests__/fetch-remote-file/index.js b/integration-tests/gatsby-pipeline/__tests__/fetch-remote-file/index.js
new file mode 100644
index 0000000000000..8078622a170d4
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/__tests__/fetch-remote-file/index.js
@@ -0,0 +1,66 @@
+/**
+ * We want to make sure that fetch-remote-file is working with multi workers.
+ */
+
+const execa = require(`execa`)
+const path = require(`path`)
+const glob = require(`glob`)
+const fs = require(`fs-extra`)
+const md5File = require(`md5-file`)
+const basePath = path.resolve(__dirname, `../../`)
+
+const cleanDirs = () =>
+ Promise.all([
+ fs.emptyDir(`${basePath}/public`),
+ fs.emptyDir(`${basePath}/.cache`),
+ ])
+
+describe(`fetch-remote-file`, () => {
+ beforeAll(async () => {
+ await cleanDirs()
+ await execa(`yarn`, [`build`], {
+ cwd: basePath,
+ // we want to force 1 query per worker
+ env: { NODE_ENV: `production`, GATSBY_PARALLEL_QUERY_CHUNK_SIZE: `1` },
+ })
+ }, 60 * 1000)
+
+ it("should have the correct md5", async () => {
+ expect(
+ await md5File(
+ path.join(
+ __dirname,
+ "../..",
+ "public/images/50c58a791de3c2303e62084d731799eb/photoA.jpg"
+ )
+ )
+ ).toEqual("a9e57a66a10b2d26a1999a4685d7c9ef")
+ expect(
+ await md5File(
+ path.join(
+ __dirname,
+ "../..",
+ "public/images/4910e745c3c453b8795d6ba65c79d99b/photoB.jpg"
+ )
+ )
+ ).toEqual("c305dc5c5db45cc773231a507af5116d")
+ expect(
+ await md5File(
+ path.join(
+ __dirname,
+ "../..",
+ "public/images/fb673e75e9534b3cc2d2e24085386d48/photoC.jpg"
+ )
+ )
+ ).toEqual("4ba953ba27236727d7abe7d5b8916432")
+ })
+
+ /**
+ * this is a bit of a cheeky test but we just want to make sure we're actually running on multiple workers
+ */
+ it("should have conflict between workers", async () => {
+ const files = await fs.readdir(path.join(__dirname, "../../.cache/workers"))
+
+ expect(files.length).toBeGreaterThan(1)
+ })
+})
diff --git a/integration-tests/gatsby-pipeline/gatsby-node.js b/integration-tests/gatsby-pipeline/gatsby-node.js
new file mode 100644
index 0000000000000..07cf62385916c
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/gatsby-node.js
@@ -0,0 +1,69 @@
+const { fetchRemoteFile } = require("gatsby-core-utils/fetch-remote-file")
+const { slash } = require("gatsby-core-utils")
+const path = require("path")
+const fs = require("fs-extra")
+
+/** @type{import('gatsby').createSchemaCustomization} */
+exports.createSchemaCustomization = ({ actions, schema, cache, reporter }) => {
+ actions.createTypes(
+ schema.buildObjectType({
+ name: "MyRemoteFile",
+ fields: {
+ url: "String!",
+ publicUrl: {
+ type: "String!",
+ async resolve(source) {
+ const filePath = await fetchRemoteFile({
+ name: path.basename(source.name, path.extname(source.name)),
+ ext: path.extname(source.name),
+ url: source.url,
+ directory: "./public/images",
+ })
+
+ const dir = path.join(global.__GATSBY.root, ".cache", "workers")
+ await fs.ensureDir(dir)
+ await fs.createFile(
+ `${path.join(dir, `worker-${process.env.GATSBY_WORKER_ID}`)}`
+ )
+
+ const workers = (await cache.get("workers")) ?? []
+ workers.push(process.env.GATSBY_WORKER_ID)
+
+ return `${slash(filePath.replace(/^public/, ""))}`
+ },
+ },
+ },
+ interfaces: ["Node"],
+ })
+ )
+}
+
+/** @type {imporg('gatsby').sourceNodes} */
+exports.sourceNodes = ({ actions, createNodeId, createContentDigest }) => {
+ const items = [
+ {
+ name: "photoA.jpg",
+ url: "https://images.unsplash.com/photo-1517849845537-4d257902454a?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=300&q=80",
+ },
+ {
+ name: "photoB.jpg",
+ url: "https://images.unsplash.com/photo-1552053831-71594a27632d?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=300&q=80",
+ },
+ {
+ name: "photoC.jpg",
+ url: "https://images.unsplash.com/photo-1561037404-61cd46aa615b?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=300&q=80",
+ },
+ ]
+
+ items.forEach((item, index) => {
+ actions.createNode({
+ id: createNodeId(`remote-file-${index}`),
+ name: item.name,
+ url: item.url,
+ internal: {
+ type: "MyRemoteFile",
+ contentDigest: createContentDigest(item.url),
+ },
+ })
+ })
+}
diff --git a/integration-tests/gatsby-pipeline/src/pages/fetch-remote-a.js b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-a.js
new file mode 100644
index 0000000000000..578769d050771
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-a.js
@@ -0,0 +1,31 @@
+import React from "react"
+import { graphql, Link } from "gatsby"
+
+import Layout from "../components/layout"
+import SEO from "../components/seo"
+
+const FetchRemoteA = ({ data }) => {
+ return (
+
+
+
+
+ Go back to the homepage
+
+ )
+}
+
+export default FetchRemoteA
+
+export const pageQuery = graphql`
+ {
+ allMyRemoteFile {
+ nodes {
+ url
+ publicUrl
+ }
+ }
+ }
+`
diff --git a/integration-tests/gatsby-pipeline/src/pages/fetch-remote-b.js b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-b.js
new file mode 100644
index 0000000000000..e7e86751904a4
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-b.js
@@ -0,0 +1,32 @@
+import React from "react"
+import { graphql, Link } from "gatsby"
+
+import Layout from "../components/layout"
+import SEO from "../components/seo"
+
+const FetchRemoteB = ({ data }) => {
+ return (
+
+
+
+
+
+ Go back to the homepage
+
+ )
+}
+
+export default FetchRemoteB
+
+export const pageQuery = graphql`
+ {
+ allMyRemoteFile {
+ nodes {
+ url
+ publicUrl
+ }
+ }
+ }
+`
diff --git a/integration-tests/gatsby-pipeline/src/pages/fetch-remote-c.js b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-c.js
new file mode 100644
index 0000000000000..6e3535c399090
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-c.js
@@ -0,0 +1,31 @@
+import React from "react"
+import { graphql, Link } from "gatsby"
+
+import Layout from "../components/layout"
+import SEO from "../components/seo"
+
+const FetchRemoteB = ({ data }) => {
+ return (
+
+
+
+
+ Go back to the homepage
+
+ )
+}
+
+export default FetchRemoteB
+
+export const pageQuery = graphql`
+ {
+ allMyRemoteFile {
+ nodes {
+ url
+ publicUrl
+ }
+ }
+ }
+`
diff --git a/integration-tests/gatsby-pipeline/src/pages/fetch-remote-d.js b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-d.js
new file mode 100644
index 0000000000000..88312818acc3c
--- /dev/null
+++ b/integration-tests/gatsby-pipeline/src/pages/fetch-remote-d.js
@@ -0,0 +1,31 @@
+import React from "react"
+import { graphql, Link } from "gatsby"
+
+import Layout from "../components/layout"
+import SEO from "../components/seo"
+
+const FetchRemoteB = ({ data }) => {
+ return (
+
+
+
+
+ Go back to the homepage
+
+ )
+}
+
+export default FetchRemoteB
+
+export const pageQuery = graphql`
+ {
+ allMyRemoteFile {
+ nodes {
+ url
+ publicUrl
+ }
+ }
+ }
+`
diff --git a/integration-tests/gatsby-source-wordpress/jest.config.js b/integration-tests/gatsby-source-wordpress/jest.config.js
index 9248bce5aebb8..d468a716f3da8 100644
--- a/integration-tests/gatsby-source-wordpress/jest.config.js
+++ b/integration-tests/gatsby-source-wordpress/jest.config.js
@@ -1,4 +1,7 @@
module.exports = {
testPathIgnorePatterns: [`/node_modules/`, `__tests__/fixtures`, `.cache`],
bail: true,
+ moduleNameMapper: {
+ "^gatsby-core-utils/(.*)$": `gatsby-core-utils/dist/$1`, // Workaround for https://github.com/facebook/jest/issues/9771
+ },
}
diff --git a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
index 42965850d2f8e..5c846355e19b3 100644
--- a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
+++ b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
@@ -2,33 +2,19 @@
import path from "path"
import zlib from "zlib"
-import os from "os"
import { rest } from "msw"
import { setupServer } from "msw/node"
import { Writable } from "stream"
import got from "got"
import fs from "fs-extra"
+import { fetchRemoteFile } from "../fetch-remote-file"
+import * as storage from "../utils/get-storage"
-jest.mock(`got`, () => {
- const realGot = jest.requireActual(`got`)
+jest.spyOn(storage, `getDatabaseDir`)
+jest.spyOn(got, `stream`)
+jest.spyOn(fs, `move`)
- return {
- ...realGot,
- default: {
- ...realGot,
- stream: jest.fn(realGot.stream),
- },
- }
-})
const gotStream = got.stream
-jest.mock(`fs-extra`, () => {
- const realFs = jest.requireActual(`fs-extra`)
-
- return {
- ...realFs,
- move: jest.fn(realFs.move),
- }
-})
const fsMove = fs.move
const urlCount = new Map()
@@ -233,31 +219,7 @@ const server = setupServer(
)
)
-function getFetchInWorkerContext(workerId) {
- let fetchRemoteInstance
- jest.isolateModules(() => {
- const send = process.send
- process.env.GATSBY_WORKER_ID = workerId
- process.send = jest.fn()
- process.env.GATSBY_WORKER_MODULE_PATH = `123`
-
- fetchRemoteInstance = require(`../fetch-remote-file`).fetchRemoteFile
-
- delete process.env.GATSBY_WORKER_MODULE_PATH
- delete process.env.GATSBY_WORKER_ID
- process.send = send
- })
-
- return fetchRemoteInstance
-}
-
-async function createMockCache() {
- const tmpDir = fs.mkdtempSync(
- path.join(os.tmpdir(), `gatsby-source-filesystem-`)
- )
-
- fs.ensureDir(tmpDir)
-
+async function createMockCache(tmpDir) {
return {
get: jest.fn(() => Promise.resolve(null)),
set: jest.fn(() => Promise.resolve(null)),
@@ -267,21 +229,21 @@ async function createMockCache() {
describe(`fetch-remote-file`, () => {
let cache
- let fetchRemoteFile
+ const cachePath = path.join(__dirname, `.cache-fetch`)
beforeAll(async () => {
- cache = await createMockCache()
// Establish requests interception layer before all tests.
server.listen()
+
+ cache = await createMockCache(cachePath)
+ await fs.ensureDir(cachePath)
+ storage.getDatabaseDir.mockReturnValue(cachePath)
})
- afterAll(() => {
- if (cache) {
- try {
- fs.removeSync(cache.directory)
- } catch (err) {
- // ignore
- }
- }
+
+ afterAll(async () => {
+ await storage.closeDatabase()
+ await fs.remove(cachePath)
+ delete global.__GATSBY
// Clean up after all tests are done, preventing this
// interception layer from affecting irrelevant tests.
@@ -289,18 +251,15 @@ describe(`fetch-remote-file`, () => {
})
beforeEach(() => {
+ // simulate a new build each run
+ global.__GATSBY = {
+ buildId: global.__GATSBY?.buildId
+ ? String(Number(global.__GATSBY.buildId) + 1)
+ : `1`,
+ }
gotStream.mockClear()
fsMove.mockClear()
urlCount.clear()
-
- jest.isolateModules(() => {
- // we need to bypass the cache for each test
- fetchRemoteFile = require(`../fetch-remote-file`).fetchRemoteFile
- })
- })
-
- afterEach(() => {
- jest.useRealTimers()
})
it(`downloads and create a svg file`, async () => {
@@ -380,314 +339,24 @@ describe(`fetch-remote-file`, () => {
expect(gotStream).toBeCalledTimes(1)
})
- it(`only writes the file once when multiple workers fetch at the same time`, async () => {
- // we don't want to wait for polling to finish
- jest.useFakeTimers()
- jest.runAllTimers()
-
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
- const fetchRemoteFileInstanceTwo = getFetchInWorkerContext(`2`)
-
- const requests = [
- fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- fetchRemoteFileInstanceTwo({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- ]
-
- // reverse order as last writer wins
- await requests[1]
- jest.runAllTimers()
- await requests[0]
-
- // we still expect 2 fetches because cache can't save fast enough
- expect(gotStream).toBeCalledTimes(2)
- expect(fsMove).toBeCalledTimes(1)
- })
-
- it(`it clears the mutex cache when new build id is present`, async () => {
- // we don't want to wait for polling to finish
- jest.useFakeTimers()
- jest.runAllTimers()
-
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
- const fetchRemoteFileInstanceTwo = getFetchInWorkerContext(`2`)
-
- global.__GATSBY = { buildId: `1` }
- let requests = [
- fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- fetchRemoteFileInstanceTwo({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- ]
-
- // reverse order as last writer wins
- await requests[1]
- jest.runAllTimers()
- await requests[0]
- jest.runAllTimers()
-
- global.__GATSBY = { buildId: `2` }
- requests = [
- fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- fetchRemoteFileInstanceTwo({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- }),
- ]
-
- // reverse order as last writer wins
- await requests[1]
- jest.runAllTimers()
- await requests[0]
-
- // we still expect 4 fetches because cache can't save fast enough
- expect(gotStream).toBeCalledTimes(4)
- expect(fsMove).toBeCalledTimes(2)
- })
-
- it(`handles 304 responses correctly in different builds`, async () => {
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- global.__GATSBY = { buildId: `1` }
+ it(`handles 304 responses correctly`, async () => {
+ const currentGlobal = global.__GATSBY
+ global.__GATSBY = { buildId: `304-1` }
const filePath = await fetchRemoteFile({
url: `http://external.com/dog-304.jpg`,
- cache: workerCache,
+ directory: cachePath,
})
- global.__GATSBY = { buildId: `2` }
+ global.__GATSBY = { buildId: `304-2` }
const filePathCached = await fetchRemoteFile({
url: `http://external.com/dog-304.jpg`,
- cache: workerCache,
- })
-
- expect(filePathCached).toBe(filePath)
- expect(fsMove).toBeCalledTimes(1)
- expect(gotStream).toBeCalledTimes(2)
- })
-
- it(`doesn't keep lock when file download failed`, async () => {
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
- const fetchRemoteFileInstanceTwo = getFetchInWorkerContext(`2`)
-
- await expect(
- fetchRemoteFileInstanceOne({
- url: `http://external.com/500.jpg`,
- cache: workerCache,
- })
- ).rejects.toThrow()
-
- await expect(
- fetchRemoteFileInstanceTwo({
- url: `http://external.com/500.jpg`,
- cache: workerCache,
- })
- ).rejects.toThrow()
-
- expect(gotStream).toBeCalledTimes(3)
- expect(fsMove).toBeCalledTimes(0)
- })
-
- it(`downloading a file in main process after downloading it in worker`, async () => {
- // we don't want to wait for polling to finish
- jest.useFakeTimers()
- jest.runAllTimers()
-
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
-
- const resultFromWorker = await fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
-
- jest.runAllTimers()
-
- const resultFromMain = await fetchRemoteFile({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
-
- expect(resultFromWorker).not.toBeUndefined()
- expect(resultFromMain).not.toBeUndefined()
-
- jest.useRealTimers()
-
- expect(gotStream).toBeCalledTimes(1)
- expect(fsMove).toBeCalledTimes(1)
- })
-
- it(`downloading a file in worker process after downloading it in main`, async () => {
- // we don't want to wait for polling to finish
- jest.useFakeTimers()
- jest.runAllTimers()
-
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
-
- const resultFromMain = await fetchRemoteFile({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
-
- jest.runAllTimers()
-
- const resultFromWorker = await fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
-
- jest.runAllTimers()
- jest.useRealTimers()
-
- expect(resultFromWorker).not.toBeUndefined()
- expect(resultFromMain).not.toBeUndefined()
- expect(gotStream).toBeCalledTimes(1)
- expect(fsMove).toBeCalledTimes(1)
- })
-
- it(`downloading a file in worker process after downloading it in another worker`, async () => {
- // we don't want to wait for polling to finish
- jest.useFakeTimers()
- jest.runAllTimers()
-
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
- const fetchRemoteFileInstanceTwo = getFetchInWorkerContext(`2`)
-
- const resultFromWorker1 = await fetchRemoteFileInstanceOne({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
- jest.runAllTimers()
-
- const resultFromWorker2 = await fetchRemoteFileInstanceTwo({
- url: `http://external.com/logo.svg`,
- cache: workerCache,
- })
-
- jest.runAllTimers()
- jest.useRealTimers()
-
- expect(resultFromWorker1).not.toBeUndefined()
- expect(resultFromWorker2).not.toBeUndefined()
- expect(gotStream).toBeCalledTimes(1)
- expect(fsMove).toBeCalledTimes(1)
- })
-
- it(`handles 304 responses correctly in different builds and workers`, async () => {
- const cacheInternals = new Map()
- const workerCache = {
- get(key) {
- return Promise.resolve(cacheInternals.get(key))
- },
- set(key, value) {
- return Promise.resolve(cacheInternals.set(key, value))
- },
- directory: cache.directory,
- }
-
- const fetchRemoteFileInstanceOne = getFetchInWorkerContext(`1`)
- const fetchRemoteFileInstanceTwo = getFetchInWorkerContext(`2`)
-
- global.__GATSBY = { buildId: `1` }
- const filePath = await fetchRemoteFileInstanceOne({
- url: `http://external.com/dog-304.jpg`,
- cache: workerCache,
- })
-
- global.__GATSBY = { buildId: `2` }
- const filePathCached = await fetchRemoteFileInstanceTwo({
- url: `http://external.com/dog-304.jpg`,
- cache: workerCache,
+ directory: cachePath,
})
expect(filePathCached).toBe(filePath)
expect(fsMove).toBeCalledTimes(1)
expect(gotStream).toBeCalledTimes(2)
+ global.__GATSBY = currentGlobal
})
it(`fails when 404 is triggered`, async () => {
diff --git a/packages/gatsby-core-utils/src/fetch-remote-file.ts b/packages/gatsby-core-utils/src/fetch-remote-file.ts
index f739bc4104e4b..966f31ccc362b 100644
--- a/packages/gatsby-core-utils/src/fetch-remote-file.ts
+++ b/packages/gatsby-core-utils/src/fetch-remote-file.ts
@@ -1,109 +1,101 @@
-import got, { Headers, Options, RequestError } from "got"
import fileType from "file-type"
import path from "path"
import fs from "fs-extra"
+import Queue from "fastq"
import { createContentDigest } from "./create-content-digest"
import {
getRemoteFileName,
getRemoteFileExtension,
createFilePath,
} from "./filename-utils"
-import type { IncomingMessage } from "http"
-import type { GatsbyCache } from "gatsby"
-import Queue from "fastq"
-import type { queue, done } from "fastq"
-
-export interface IFetchRemoteFileOptions {
- url: string
- cache: GatsbyCache
- auth?: {
- htaccess_pass?: string
- htaccess_user?: string
- }
- httpHeaders?: Headers
- ext?: string
- name?: string
- maxAttempts?: number
-}
-
-// copied from gatsby-worker
-const IS_WORKER = !!(process.send && process.env.GATSBY_WORKER_MODULE_PATH)
-const WORKER_ID = process.env.GATSBY_WORKER_ID
-
-const cacheIdForWorkers = (url: string): string => `remote-file-workers-${url}`
-const cacheIdForHeaders = (url: string): string => `remote-file-headers-${url}`
-const cacheIdForExtensions = (url: string): string =>
- `remote-file-extension-${url}`
-
-const STALL_RETRY_LIMIT = process.env.GATSBY_STALL_RETRY_LIMIT
- ? parseInt(process.env.GATSBY_STALL_RETRY_LIMIT, 10)
- : 3
-const STALL_TIMEOUT = process.env.GATSBY_STALL_TIMEOUT
- ? parseInt(process.env.GATSBY_STALL_TIMEOUT, 10)
- : 30000
-
-const CONNECTION_TIMEOUT = process.env.GATSBY_CONNECTION_TIMEOUT
- ? parseInt(process.env.GATSBY_CONNECTION_TIMEOUT, 10)
- : 30000
-
-const INCOMPLETE_RETRY_LIMIT = process.env.GATSBY_INCOMPLETE_RETRY_LIMIT
- ? parseInt(process.env.GATSBY_INCOMPLETE_RETRY_LIMIT, 10)
- : 3
-
-// jest doesn't allow us to run all timings infinitely, so we set it 0 in tests
-const BACKOFF_TIME = process.env.NODE_ENV === `test` ? 0 : 1000
-
-function range(start: number, end: number): Array {
- return Array(end - start)
- .fill(null)
- .map((_, i) => start + i)
+import { slash } from "./path"
+import { requestRemoteNode } from "./remote-file-utils/fetch-file"
+import { getStorage, getDatabaseDir } from "./utils/get-storage"
+import { createMutex } from "./mutex"
+import type { Options } from "got"
+import type { IFetchRemoteFileOptions } from "./remote-file-utils/fetch-file"
+
+interface ITask {
+ args: IFetchRemoteFileOptions
}
-// Based on the defaults of https://github.com/JustinBeckwith/retry-axios
-const STATUS_CODES_TO_RETRY = [...range(100, 200), 429, ...range(500, 600)]
-const ERROR_CODES_TO_RETRY = [
- `ETIMEDOUT`,
- `ECONNRESET`,
- `EADDRINUSE`,
- `ECONNREFUSED`,
- `EPIPE`,
- `ENOTFOUND`,
- `ENETUNREACH`,
- `EAI_AGAIN`,
- `ERR_NON_2XX_3XX_RESPONSE`,
- `ERR_GOT_REQUEST_ERROR`,
-]
-
-/********************
- * Queue Management *
- ********************/
-
const GATSBY_CONCURRENT_DOWNLOAD = process.env.GATSBY_CONCURRENT_DOWNLOAD
? parseInt(process.env.GATSBY_CONCURRENT_DOWNLOAD, 10) || 0
: 50
-const q: queue = Queue(
- fetchWorker,
- GATSBY_CONCURRENT_DOWNLOAD
-)
+const alreadyCopiedFiles = new Set()
+
+export type { IFetchRemoteFileOptions }
/**
- * fetchWorker
- * --
- * Handle fetch requests that are pushed in to the Queue
+ * Downloads a remote file to disk
*/
-async function fetchWorker(
- task: IFetchRemoteFileOptions,
- cb: done
-): Promise {
- try {
- const node = await fetchFile(task)
- return void cb(null, node)
- } catch (e) {
- return void cb(e)
+export async function fetchRemoteFile(
+ args: IFetchRemoteFileOptions
+): Promise {
+ // when cachekey is present we can do more persistance
+ if (args.cacheKey) {
+ const storage = getStorage(getDatabaseDir())
+ const info = storage.remoteFileInfo.get(args.url)
+
+ const fileDirectory = (
+ args.cache ? args.cache.directory : args.directory
+ ) as string
+
+ if (info?.cacheKey === args.cacheKey && fileDirectory) {
+ const cachedPath = path.join(info.directory, info.path)
+ const downloadPath = path.join(fileDirectory, info.path)
+
+ if (await fs.pathExists(cachedPath)) {
+ // If the cached directory is not part of the public directory, we don't need to copy it
+ // as it won't be part of the build.
+ if (
+ !cachedPath.startsWith(
+ path.join(global.__GATSBY?.root ?? process.cwd(), `public`)
+ )
+ ) {
+ return cachedPath
+ }
+
+ // Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive
+ if (alreadyCopiedFiles.has(downloadPath)) {
+ alreadyCopiedFiles.add(downloadPath)
+
+ const copyFileMutex = createMutex(
+ `gatsby-core-utils:copy-fetch:${downloadPath}`,
+ 200
+ )
+ await copyFileMutex.acquire()
+ await fs.copy(cachedPath, downloadPath, {
+ overwrite: true,
+ })
+ await copyFileMutex.release()
+ }
+
+ return downloadPath
+ }
+ }
}
+
+ return pushTask({ args })
}
+const queue = Queue(
+ /**
+ * fetchWorker
+ * --
+ * Handle fetch requests that are pushed in to the Queue
+ */
+ async function fetchWorker(task, cb): Promise {
+ try {
+ return void cb(null, await fetchFile(task.args))
+ } catch (e) {
+ return void cb(e)
+ }
+ },
+ GATSBY_CONCURRENT_DOWNLOAD
+)
+
/**
* pushTask
* --
@@ -111,152 +103,79 @@ async function fetchWorker(
*
* Promisfy a task in queue
* @param {CreateRemoteFileNodePayload} task
- * @return {Promise