-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(assets): Use entity-tags to revalidate cached remote images #12426
Changes from 8 commits
2e2fc9d
e844a93
6955f3f
9981da8
83d007d
c3e60f3
f7a614a
2ebfd63
a806f5c
c63f48d
093dbdb
c3675ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
'astro': minor | ||
--- | ||
|
||
Improves the asset caching by allowing stale assets to be revalidated [using entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag). |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,18 +16,18 @@ import { getConfiguredImageService } from '../internal.js'; | |
import type { LocalImageService } from '../services/service.js'; | ||
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js'; | ||
import { isESMImportedImage } from '../utils/imageKind.js'; | ||
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js'; | ||
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js'; | ||
|
||
interface GenerationDataUncached { | ||
cached: false; | ||
cached: 'miss'; | ||
weight: { | ||
before: number; | ||
after: number; | ||
}; | ||
} | ||
|
||
interface GenerationDataCached { | ||
cached: true; | ||
cached: 'revalidated' | 'hit'; | ||
} | ||
|
||
type GenerationData = GenerationDataUncached | GenerationDataCached; | ||
|
@@ -44,7 +44,11 @@ type AssetEnv = { | |
assetsFolder: AstroConfig['build']['assets']; | ||
}; | ||
|
||
type ImageData = { data: Uint8Array; expires: number }; | ||
type ImageData = { | ||
data: Uint8Array; | ||
expires: number; | ||
etag?: string | null; | ||
}; | ||
|
||
export async function prepareAssetsGenerationEnv( | ||
pipeline: BuildPipeline, | ||
|
@@ -135,9 +139,12 @@ export async function generateImagesForPath( | |
const timeEnd = performance.now(); | ||
const timeChange = getTimeStat(timeStart, timeEnd); | ||
const timeIncrease = `(+${timeChange})`; | ||
const statsText = generationData.cached | ||
? `(reused cache entry)` | ||
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`; | ||
const statsText = | ||
generationData.cached !== 'miss' | ||
? generationData.cached === 'hit' | ||
? `(reused cache entry)` | ||
: `(revalidated cache entry)` | ||
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`; | ||
const count = `(${env.count.current}/${env.count.total})`; | ||
env.logger.info( | ||
null, | ||
|
@@ -156,7 +163,7 @@ export async function generateImagesForPath( | |
const finalFolderURL = new URL('./', finalFileURL); | ||
await fs.promises.mkdir(finalFolderURL, { recursive: true }); | ||
|
||
// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server | ||
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date and etag from the server | ||
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json'); | ||
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir); | ||
|
||
|
@@ -166,7 +173,7 @@ export async function generateImagesForPath( | |
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE); | ||
|
||
return { | ||
cached: true, | ||
cached: 'hit', | ||
}; | ||
} else { | ||
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry; | ||
|
@@ -184,11 +191,43 @@ export async function generateImagesForPath( | |
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64')); | ||
|
||
return { | ||
cached: true, | ||
cached: 'hit', | ||
}; | ||
} else { | ||
await fs.promises.unlink(cachedFileURL); | ||
} | ||
|
||
// Try to revalidate the cache | ||
if (JSONData.etag) { | ||
try { | ||
const revalidatedData = await revalidateRemoteImage( | ||
options.src as string, | ||
JSONData.etag, | ||
); | ||
|
||
if (revalidatedData.data.length) { | ||
// Image cache was stale, update original image to avoid redownload | ||
originalImage = revalidatedData; | ||
} else { | ||
revalidatedData.data = Buffer.from(JSONData.data, 'base64'); | ||
|
||
// Freshen cache on disk | ||
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env); | ||
|
||
await fs.promises.writeFile(finalFileURL, revalidatedData.data); | ||
return { cached: 'revalidated' }; | ||
} | ||
} catch (e) { | ||
// Reuse stale cache if revalidation fails | ||
env.logger.warn( | ||
null, | ||
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`, | ||
); | ||
|
||
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64')); | ||
return { cached: 'hit' }; | ||
} | ||
} | ||
|
||
await fs.promises.unlink(cachedFileURL); | ||
} | ||
} catch (e: any) { | ||
if (e.code !== 'ENOENT') { | ||
|
@@ -209,6 +248,7 @@ export async function generateImagesForPath( | |
let resultData: Partial<ImageData> = { | ||
data: undefined, | ||
expires: originalImage.expires, | ||
etag: originalImage.etag, | ||
}; | ||
|
||
const imageService = (await getConfiguredImageService()) as LocalImageService; | ||
|
@@ -239,13 +279,7 @@ export async function generateImagesForPath( | |
if (isLocalImage) { | ||
await fs.promises.writeFile(cachedFileURL, resultData.data); | ||
} else { | ||
await fs.promises.writeFile( | ||
cachedFileURL, | ||
JSON.stringify({ | ||
data: Buffer.from(resultData.data).toString('base64'), | ||
expires: resultData.expires, | ||
}), | ||
); | ||
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env); | ||
} | ||
} | ||
} catch (e) { | ||
|
@@ -259,7 +293,7 @@ export async function generateImagesForPath( | |
} | ||
|
||
return { | ||
cached: false, | ||
cached: 'miss', | ||
weight: { | ||
// Divide by 1024 to get size in kilobytes | ||
before: Math.trunc(originalImage.data.byteLength / 1024), | ||
|
@@ -269,6 +303,24 @@ export async function generateImagesForPath( | |
} | ||
} | ||
|
||
async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) { | ||
try { | ||
return await fs.promises.writeFile( | ||
cachedFileURL, | ||
JSON.stringify({ | ||
data: Buffer.from(resultData.data).toString('base64'), | ||
expires: resultData.expires, | ||
etag: resultData.etag, | ||
}), | ||
); | ||
} catch (e) { | ||
env.logger.warn( | ||
null, | ||
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`, | ||
); | ||
} | ||
} | ||
|
||
export function getStaticImageList(): AssetsGlobalStaticImagesList { | ||
if (!globalThis?.astroAsset?.staticImages) { | ||
return new Map(); | ||
|
@@ -279,11 +331,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList { | |
|
||
async function loadImage(path: string, env: AssetEnv): Promise<ImageData> { | ||
if (isRemotePath(path)) { | ||
const remoteImage = await loadRemoteImage(path); | ||
return { | ||
data: remoteImage.data, | ||
expires: remoteImage.expires, | ||
}; | ||
Comment on lines
-282
to
-286
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea what happened here, but thank you for cleaning it, ha |
||
return await loadRemoteImage(path); | ||
} | ||
|
||
return { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import CachePolicy from 'http-cache-semantics'; | ||
|
||
export type RemoteCacheEntry = { data: string; expires: number }; | ||
export type RemoteCacheEntry = { data: string; expires: number; etag?: string }; | ||
|
||
export async function loadRemoteImage(src: string) { | ||
const req = new Request(src); | ||
|
@@ -19,6 +19,49 @@ export async function loadRemoteImage(src: string) { | |
return { | ||
data: Buffer.from(await res.arrayBuffer()), | ||
expires: Date.now() + expires, | ||
etag: res.headers.get('Etag'), | ||
}; | ||
} | ||
|
||
/** | ||
* Revalidate a cached remote asset using its entity-tag. | ||
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) header to check with the remote server if the cached version of a remote asset is still up to date. | ||
* The remote server may respond that the cached asset is still up-to-date if the entity-tag matches (304 Not Modified), or respond with an updated asset (200 OK) | ||
* @param src - url to remote asset | ||
* @param etag - the stored Entity-Tag of the cached asset | ||
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified. | ||
*/ | ||
export async function revalidateRemoteImage(src: string, etag: string) { | ||
oliverlynch marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const req = new Request(src, { headers: { 'If-None-Match': etag } }); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be worth also supporting |
||
const res = await fetch(req); | ||
|
||
// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304 | ||
if (!res.ok && res.status !== 304) { | ||
throw new Error( | ||
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`, | ||
); | ||
} | ||
|
||
const data = Buffer.from(await res.arrayBuffer()); | ||
|
||
if (res.ok && !data.length) { | ||
// Server did not include body but indicated cache was stale | ||
return await loadRemoteImage(src); | ||
} | ||
|
||
// calculate an expiration date based on the response's TTL | ||
const policy = new CachePolicy( | ||
webToCachePolicyRequest(req), | ||
webToCachePolicyResponse( | ||
res.ok ? res : new Response(null, { status: 200, headers: res.headers }), | ||
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL | ||
); | ||
const expires = policy.storable() ? policy.timeToLive() : 0; | ||
|
||
return { | ||
data, | ||
expires: Date.now() + expires, | ||
etag: res.headers.get('Etag'), | ||
}; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realise that this is what we were already doing, but it does seem a bit wasteful to be saving images as JSON-encoded base64 strings, and this could be a good time to fix it. I think it would be better to store the image in a binary, and then use a separate JSON file for metadata, probably with the same filename alongside it but with something like an appended
.json