Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gatsby-core-utils): Add file download functions #29531

Merged
merged 7 commits into from
Feb 20, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/gatsby-core-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"dependencies": {
"ci-info": "2.0.0",
"configstore": "^5.0.1",
"file-type": "^16.2.0",
"fs-extra": "^8.1.0",
"node-object-hash": "^2.0.0",
"proper-lockfile": "^4.1.1",
Expand Down
227 changes: 227 additions & 0 deletions packages/gatsby-core-utils/src/fetch-remote-file.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import got from "got"
import fileType from "file-type"
import path from "path"
import { IncomingMessage, OutgoingHttpHeaders } from "http"
import fs from "fs-extra"
import { createContentDigest } from "."
import {
getRemoteFileName,
getRemoteFileExtension,
createFilePath,
} from "./filename-utils"

import { GatsbyCache } from "gatsby"

const cacheIdForHeaders = (url: string): string =>
`create-remote-file-node-headers-${url}`
const cacheIdForExtensions = (url: string): string =>
`create-remote-file-node-extension-${url}`

const STALL_RETRY_LIMIT = process.env.GATSBY_STALL_RETRY_LIMIT
? parseInt(process.env.GATSBY_STALL_RETRY_LIMIT, 10)
: 3
const STALL_TIMEOUT = process.env.GATSBY_STALL_TIMEOUT
? parseInt(process.env.GATSBY_STALL_TIMEOUT, 10)
: 30000

const CONNECTION_TIMEOUT = process.env.GATSBY_CONNECTION_TIMEOUT
? parseInt(process.env.GATSBY_CONNECTION_TIMEOUT, 10)
: 30000

const INCOMPLETE_RETRY_LIMIT = process.env.GATSBY_INCOMPLETE_RETRY_LIMIT
? parseInt(process.env.GATSBY_INCOMPLETE_RETRY_LIMIT, 10)
: 3

/**
* requestRemoteNode
* --
* Download the requested file
*
* @param {String} url
* @param {Headers} headers
* @param {String} tmpFilename
* @param {Object} httpOpts
* @param {number} attempt
* @return {Promise<Object>} Resolves with the [http Result Object]{@link https://nodejs.org/api/http.html#http_class_http_serverresponse}
*/
const requestRemoteNode = (
url: got.GotUrl,
headers: OutgoingHttpHeaders,
tmpFilename: string,
httpOpts: got.GotOptions<string | null> | undefined,
attempt: number = 1
): Promise<IncomingMessage> =>
new Promise((resolve, reject) => {
let timeout: NodeJS.Timeout
const fsWriteStream = fs.createWriteStream(tmpFilename)

// Called if we stall for 30s without receiving any data
const handleTimeout = async (): Promise<void> => {
fsWriteStream.close()
fs.removeSync(tmpFilename)
if (attempt < STALL_RETRY_LIMIT) {
// Retry by calling ourself recursively
resolve(
requestRemoteNode(url, headers, tmpFilename, httpOpts, attempt + 1)
)
} else {
reject(`Failed to download ${url} after ${STALL_RETRY_LIMIT} attempts`)
}
}

const resetTimeout = (): void => {
if (timeout) {
clearTimeout(timeout)
}
timeout = setTimeout(handleTimeout, STALL_TIMEOUT)
}
const responseStream = got.stream(url, {
headers,
timeout: {
send: CONNECTION_TIMEOUT, // https://github.com/sindresorhus/got#timeout
},
...httpOpts,
})

let haveAllBytesBeenWritten = false
responseStream.on(`downloadProgress`, progress => {
if (progress.transferred === progress.total || progress.total === null) {
haveAllBytesBeenWritten = true
}
})

responseStream.pipe(fsWriteStream)

// If there's a 400/500 response or other error.
responseStream.on(`error`, error => {
if (timeout) {
clearTimeout(timeout)
}
fs.removeSync(tmpFilename)
reject(error)
})

fsWriteStream.on(`error`, (error: any) => {
if (timeout) {
clearTimeout(timeout)
}
reject(error)
})

responseStream.on(`response`, response => {
resetTimeout()

fsWriteStream.on(`finish`, () => {
fsWriteStream.close()

// We have an incomplete download
if (!haveAllBytesBeenWritten) {
fs.removeSync(tmpFilename)

if (attempt < INCOMPLETE_RETRY_LIMIT) {
resolve(
requestRemoteNode(
url,
headers,
tmpFilename,
httpOpts,
attempt + 1
)
)
} else {
reject(
`Failed to download ${url} after ${INCOMPLETE_RETRY_LIMIT} attempts`
)
}
}

if (timeout) {
clearTimeout(timeout)
}
resolve(response)
})
})
})

interface IFetchRemoteFileOptions {
ascorbic marked this conversation as resolved.
Show resolved Hide resolved
url: string
cache: GatsbyCache
auth?: {
htaccess_pass?: string
htaccess_user?: string
}
httpHeaders?: OutgoingHttpHeaders
ext: string
name: string
}

export async function fetchRemoteFile({
url,
cache,
auth = {},
httpHeaders = {},
ext,
name,
}: IFetchRemoteFileOptions): Promise<string> {
const pluginCacheDir = cache.directory
// See if there's response headers for this url
// from a previous request.
const cachedHeaders = await cache.get(cacheIdForHeaders(url))

const headers = { ...httpHeaders }
if (cachedHeaders && cachedHeaders.etag) {
headers[`If-None-Match`] = cachedHeaders.etag
}

// Add htaccess authentication if passed in. This isn't particularly
// extensible. We should define a proper API that we validate.
const httpOpts: got.GotOptions<string | null> = {}
if (auth && (auth.htaccess_pass || auth.htaccess_user)) {
httpOpts.auth = `${auth.htaccess_user}:${auth.htaccess_pass}`
}

// Create the temp and permanent file names for the url.
const digest = createContentDigest(url)
if (!name) {
name = getRemoteFileName(url)
}
if (!ext) {
ext = getRemoteFileExtension(url)
}

const tmpFilename = createFilePath(pluginCacheDir, `tmp-${digest}`, ext)

// Fetch the file.
const response = await requestRemoteNode(url, headers, tmpFilename, httpOpts)

if (response.statusCode === 200) {
// Save the response headers for future requests.
await cache.set(cacheIdForHeaders(url), response.headers)
}

// If the user did not provide an extension and we couldn't get one from remote file, try and guess one
if (ext === ``) {
if (response.statusCode === 200) {
// if this is fresh response - try to guess extension and cache result for future
const filetype = await fileType.fromFile(tmpFilename)
if (filetype) {
ext = `.${filetype.ext}`
await cache.set(cacheIdForExtensions(url), ext)
}
} else if (response.statusCode === 304) {
// if file on server didn't change - grab cached extension
ext = await cache.get(cacheIdForExtensions(url))
}
}

const filename = createFilePath(path.join(pluginCacheDir, digest), name, ext)
// If the status code is 200, move the piped temp file to the real name.
if (response.statusCode === 200) {
await fs.move(tmpFilename, filename, { overwrite: true })
// Else if 304, remove the empty response.
} else {
await fs.remove(tmpFilename)
}

return filename
}
44 changes: 44 additions & 0 deletions packages/gatsby-core-utils/src/filename-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import path from "path"
import Url from "url"

/**
* getParsedPath
* --
* Parses remote url to a path object
*
*/
function getParsedPath(url: string): path.ParsedPath {
return path.parse(Url.parse(url).pathname || ``)
}

/**
* getRemoteFileExtension
* --
* Parses remote url to retrieve remote file extension
*
*/
export function getRemoteFileExtension(url: string): string {
return getParsedPath(url).ext
}

/**
* getRemoteFileName
* --
* Parses remote url to retrieve remote file name
*
*/
export function getRemoteFileName(url: string): string {
return getParsedPath(url).name
}

/**
* createFilePath
* --
*/
export function createFilePath(
directory: string,
filename: string,
ext: string
): string {
return path.join(directory, `${filename}${ext}`)
}
1 change: 1 addition & 0 deletions packages/gatsby-core-utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ export { createRequireFromPath } from "./create-require-from-path"
export { getConfigStore } from "./get-config-store"
export { getGatsbyVersion } from "./get-gatsby-version"
export { getTermProgram } from "./get-term-program"
export { fetchRemoteFile } from "./fetch-remote-file"
export * from "./service-lock"
export * from "./site-metadata"
Loading