Skip to content

Commit

Permalink
feat(assets): Use entity-tags to revalidate cached remote images (#12426
Browse files Browse the repository at this point in the history
)

* feat(assets): Store etag to refresh cached images without a full download

* Seperate loading and revalidating functions

* Add changeset

* Updates based on requested changes

* Wording changes, use stale cache on failure to revalidate

* Add If-Modified-Since as cache revalidation method

* Update .changeset/red-poems-pay.md

Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com>

---------

Co-authored-by: Matt Kane <m@mk.gg>
Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 18, 2024
1 parent ca3ff15 commit 3dc02c5
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 26 deletions.
7 changes: 7 additions & 0 deletions .changeset/red-poems-pay.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'astro': minor
---

Improves asset caching of remote images

Astro will now store [entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) and the [Last-Modified](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified) date for cached remote images and use them to revalidate the cache when it goes stale.
101 changes: 76 additions & 25 deletions packages/astro/src/assets/build/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ import { getConfiguredImageService } from '../internal.js';
import type { LocalImageService } from '../services/service.js';
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js';
import { isESMImportedImage } from '../utils/imageKind.js';
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js';
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js';

interface GenerationDataUncached {
cached: false;
cached: 'miss';
weight: {
before: number;
after: number;
};
}

interface GenerationDataCached {
cached: true;
cached: 'revalidated' | 'hit';
}

type GenerationData = GenerationDataUncached | GenerationDataCached;
Expand All @@ -43,7 +43,12 @@ type AssetEnv = {
assetsFolder: AstroConfig['build']['assets'];
};

type ImageData = { data: Uint8Array; expires: number };
type ImageData = {
data: Uint8Array;
expires: number;
etag?: string;
lastModified?: string;
};

export async function prepareAssetsGenerationEnv(
pipeline: BuildPipeline,
Expand Down Expand Up @@ -135,9 +140,12 @@ export async function generateImagesForPath(
const timeEnd = performance.now();
const timeChange = getTimeStat(timeStart, timeEnd);
const timeIncrease = `(+${timeChange})`;
const statsText = generationData.cached
? `(reused cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const statsText =
generationData.cached !== 'miss'
? generationData.cached === 'hit'
? `(reused cache entry)`
: `(revalidated cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const count = `(${env.count.current}/${env.count.total})`;
env.logger.info(
null,
Expand All @@ -156,7 +164,7 @@ export async function generateImagesForPath(
const finalFolderURL = new URL('./', finalFileURL);
await fs.promises.mkdir(finalFolderURL, { recursive: true });

// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date, etag and last-modified date from the server
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json');
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir);

Expand All @@ -166,7 +174,7 @@ export async function generateImagesForPath(
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE);

return {
cached: true,
cached: 'hit',
};
} else {
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry;
Expand All @@ -184,11 +192,43 @@ export async function generateImagesForPath(
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));

return {
cached: true,
cached: 'hit',
};
} else {
await fs.promises.unlink(cachedFileURL);
}

// Try to revalidate the cache
if (JSONData.etag || JSONData.lastModified) {
try {
const revalidatedData = await revalidateRemoteImage(options.src as string, {
etag: JSONData.etag,
lastModified: JSONData.lastModified,
});

if (revalidatedData.data.length) {
// Image cache was stale, update original image to avoid redownload
originalImage = revalidatedData;
} else {
revalidatedData.data = Buffer.from(JSONData.data, 'base64');

// Freshen cache on disk
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env);

await fs.promises.writeFile(finalFileURL, revalidatedData.data);
return { cached: 'revalidated' };
}
} catch (e) {
// Reuse stale cache if revalidation fails
env.logger.warn(
null,
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`,
);

await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
return { cached: 'hit' };
}
}

await fs.promises.unlink(cachedFileURL);
}
} catch (e: any) {
if (e.code !== 'ENOENT') {
Expand All @@ -209,6 +249,8 @@ export async function generateImagesForPath(
let resultData: Partial<ImageData> = {
data: undefined,
expires: originalImage.expires,
etag: originalImage.etag,
lastModified: originalImage.lastModified,
};

const imageService = (await getConfiguredImageService()) as LocalImageService;
Expand Down Expand Up @@ -239,13 +281,7 @@ export async function generateImagesForPath(
if (isLocalImage) {
await fs.promises.writeFile(cachedFileURL, resultData.data);
} else {
await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
}),
);
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env);
}
}
} catch (e) {
Expand All @@ -259,7 +295,7 @@ export async function generateImagesForPath(
}

return {
cached: false,
cached: 'miss',
weight: {
// Divide by 1024 to get size in kilobytes
before: Math.trunc(originalImage.data.byteLength / 1024),
Expand All @@ -269,6 +305,25 @@ export async function generateImagesForPath(
}
}

async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) {
try {
return await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
etag: resultData.etag,
lastModified: resultData.lastModified,
}),
);
} catch (e) {
env.logger.warn(
null,
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`,
);
}
}

export function getStaticImageList(): AssetsGlobalStaticImagesList {
if (!globalThis?.astroAsset?.staticImages) {
return new Map();
Expand All @@ -279,11 +334,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList {

async function loadImage(path: string, env: AssetEnv): Promise<ImageData> {
if (isRemotePath(path)) {
const remoteImage = await loadRemoteImage(path);
return {
data: remoteImage.data,
expires: remoteImage.expires,
};
return await loadRemoteImage(path);
}

return {
Expand Down
62 changes: 61 additions & 1 deletion packages/astro/src/assets/build/remote.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import CachePolicy from 'http-cache-semantics';

export type RemoteCacheEntry = { data: string; expires: number };
export type RemoteCacheEntry = {
data: string;
expires: number;
etag?: string;
lastModified?: string;
};

export async function loadRemoteImage(src: string) {
const req = new Request(src);
Expand All @@ -19,6 +24,61 @@ export async function loadRemoteImage(src: string) {
return {
data: Buffer.from(await res.arrayBuffer()),
expires: Date.now() + expires,
etag: res.headers.get('Etag') ?? undefined,
lastModified: res.headers.get('Last-Modified') ?? undefined,
};
}

/**
* Revalidate a cached remote asset using its entity-tag or modified date.
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) and [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since)
* headers to check with the remote server if the cached version of a remote asset is still up to date.
* The remote server may respond that the cached asset is still up-to-date if the entity-tag or modification time matches (304 Not Modified), or respond with an updated asset (200 OK)
* @param src - url to remote asset
* @param revalidationData - an object containing the stored Entity-Tag of the cached asset and/or the Last Modified time
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified.
*/
export async function revalidateRemoteImage(
src: string,
revalidationData: { etag?: string; lastModified?: string },
) {
const headers = {
...(revalidationData.etag && { 'If-None-Match': revalidationData.etag }),
...(revalidationData.lastModified && { 'If-Modified-Since': revalidationData.lastModified }),
};
const req = new Request(src, { headers });
const res = await fetch(req);

// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304
if (!res.ok && res.status !== 304) {
throw new Error(
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`,
);
}

const data = Buffer.from(await res.arrayBuffer());

if (res.ok && !data.length) {
// Server did not include body but indicated cache was stale
return await loadRemoteImage(src);
}

// calculate an expiration date based on the response's TTL
const policy = new CachePolicy(
webToCachePolicyRequest(req),
webToCachePolicyResponse(
res.ok ? res : new Response(null, { status: 200, headers: res.headers }),
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL
);
const expires = policy.storable() ? policy.timeToLive() : 0;

return {
data,
expires: Date.now() + expires,
// While servers should respond with the same headers as a 200 response, if they don't we should reuse the stored value
etag: res.headers.get('Etag') ?? (res.ok ? undefined : revalidationData.etag),
lastModified:
res.headers.get('Last-Modified') ?? (res.ok ? undefined : revalidationData.lastModified),
};
}

Expand Down

0 comments on commit 3dc02c5

Please sign in to comment.