diff --git a/.changeset/hungry-cameras-trade.md b/.changeset/hungry-cameras-trade.md new file mode 100644 index 0000000..403f6f5 --- /dev/null +++ b/.changeset/hungry-cameras-trade.md @@ -0,0 +1,35 @@ +--- +'@apollo/datasource-rest': major +--- + +Instead of memoizing GET requests forever in memory, only apply de-duplication during the lifetime of the original request. Replace the `memoizeGetRequests` field with a `requestDeduplicationPolicyFor()` method to determine how de-duplication works per request. + +To restore the surprising infinite-unconditional-cache behavior of previous versions, use this implementation of `requestDeduplicationPolicyFor()` (which replaces `deduplicate-during-request-lifetime` with `deduplicate-until-invalidated`): + +```ts +override protected requestDeduplicationPolicyFor( + url: URL, + request: RequestOptions, +): RequestDeduplicationPolicy { + const cacheKey = this.cacheKeyFor(url, request); + if (request.method === 'GET') { + return { + policy: 'deduplicate-until-invalidated', + deduplicationKey: `${request.method} ${cacheKey}`, + }; + } else { + return { + policy: 'do-not-deduplicate', + invalidateDeduplicationKeys: [`GET ${cacheKey}`], + }; + } +} +``` + +To restore the behavior of `memoizeGetRequests = false`, use this implementation of `requestDeduplicationPolicyFor()`: + +```ts +protected override requestDeduplicationPolicyFor() { + return { policy: 'do-not-deduplicate' } as const; +} +``` diff --git a/README.md b/README.md index c32c50e..2443cc7 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This package exports a ([`RESTDataSource`](https://github.com/apollographql/datasource-rest#apollo-rest-data-source)) class which is used for fetching data from a REST API and exposing it via GraphQL within Apollo Server. +RESTDataSource provides two levels of caching: an in-memory "request deduplication" feature primarily used to avoid sending the same GET request multiple times in parallel, and an "HTTP cache" which provides browser-style caching in a (potentially shared) `KeyValueCache` which observes standard HTTP caching headers. + ## Documentation View the [Apollo Server documentation for data sources](https://www.apollographql.com/docs/apollo-server/features/data-sources/) for more details. @@ -22,10 +24,7 @@ Your implementation of these methods can call on convenience methods built into const { RESTDataSource } = require('@apollo/datasource-rest'); class MoviesAPI extends RESTDataSource { - constructor() { - super(); - this.baseURL = 'https://movies-api.example.com/'; - } + override baseURL = 'https://movies-api.example.com/'; async getMovie(id) { return this.get(`movies/${encodeURIComponent(id)}`); @@ -52,10 +51,8 @@ Optional value to use for all the REST calls. If it is set in your class impleme ```js title="baseURL.js" class MoviesAPI extends RESTDataSource { - constructor() { - super(); - this.baseURL = 'https://movies-api.example.com/'; - } + override baseURL = 'https://movies-api.example.com/'; + // GET async getMovie(id) { return this.get( @@ -74,35 +71,70 @@ In practice, this means that you should usually set `this.baseURL` to the common If a resource's path starts with something that looks like an URL because it contains a colon and you want it to be added on to the full base URL after its path (so you can't pass it as `this.get('/foo:bar')`), you can pass a path starting with `./`, like `this.get('./foo:bar')`. -##### `memoizeGetRequests` -By default, `RESTDataSource` caches all outgoing GET **requests** in a separate memoized cache from the regular response cache. It makes the assumption that all responses from HTTP GET calls are cacheable by their URL. -If a request is made with the same cache key (URL by default) but with an HTTP method other than GET, the cached request is then cleared. -If you would like to disable the GET request cache, set the `memoizeGetRequests` property to `false`. You might want to do this if your API is not actually cacheable or your data changes over time. +#### Methods -```js title="memoizeGetRequests.js" -class MoviesAPI extends RESTDataSource { - constructor() { - super(); - // Defaults to true - this.memoizeGetRequests = false; - } +##### `cacheKeyFor` +By default, `RESTDatasource` uses the full request URL as a cache key when saving information about the request to the `KeyValueCache`. Override this method to remove query parameters or compute a custom cache key. - // Outgoing requests are never cached, however the response cache is still enabled - async getMovie(id) { - return this.get( - `https://movies-api.example.com/movies/${encodeURIComponent(id)}` // path - ); +For example, you could use this to use header fields or the HTTP method as part of the cache key. Even though we do validate header fields and don't serve responses from cache when they don't match, new responses overwrite old ones with different header fields. (For the HTTP method, this might be a positive thing, as you may want a `POST /foo` request to stop a previously cached `GET /foo` from being returned.) + +##### `requestDeduplicationPolicyFor` + +By default, `RESTDataSource` de-duplicates all **concurrent** outgoing **GET requests** in an in-memory cache, separate from the `KeyValueCache` used for the HTTP response cache. It makes the assumption that two HTTP GET requests to the same URL made in parallel can share the same response. When the GET request returns, its response is delivered to each caller that requested the same URL concurrently, and then it is removed from the cache. + +If a request is made with the same cache key (URL by default) but with an HTTP method other than GET, deduplication of the in-flight request is invalidated: the next parallel `GET` request for the same URL will make a new request. + +You can configure this behavior in several ways: +- You can change which requests are de-deduplicated and which are not. +- You can tell `RESTDataSource` to de-duplicate a request against new requests that start after it completes, not just overlapping requests. (This was the poorly-documented behavior of `RESTDataSource` prior to v5.0.0.) +- You can control the "deduplication key" independently from the `KeyValueCache` cache key. + +You do this by overriding the `requestDeduplicationPolicyFor` method in your class. This method takes an URL and a request, and returns a policy object with one of three forms: + +- `{policy: 'deduplicate-during-request-lifetime', deduplicationKey: string}`: This is the default behavior for GET requests. If a request with the same deduplication key is in progress, share its result. Otherwise, start a request, allow other requests to de-duplicate against it while it is running, and forget about it once the request returns successfully. +- `{policy: 'deduplicate-until-invalidated', deduplicationKey: string}`: This was the default behavior for GET requests in versions prior to v5. If a request with the same deduplication key is in progress, share its result. Otherwise, start a request and allow other requests to de-duplicate against it while it is running. All future requests with policy `deduplicate-during-request-lifetime` or `deduplicate-until-invalidated` with the same `deduplicationKey` will share the same result until a request is started with policy `do-not-deduplicate` and a matching entry in `invalidateDeduplicationKeys`. +- `{ policy: 'do-not-deduplicate'; invalidateDeduplicationKeys?: string[] }`: This is the default behavior for non-GET requests. Always run an actual HTTP request and don't allow other requests to de-duplicate against it. Additionally, invalidate any listed keys immediately: new requests with that `deduplicationKey` will not match any requests that currently exist in the request cache. + +The default implementation of this method is: + +```ts +protected requestDeduplicationPolicyFor( + url: URL, + request: RequestOptions, +): RequestDeduplicationPolicy { + // Start with the cache key that is used for the shared header-sensitive + // cache. Note that its default implementation does not include the HTTP + // method, so if a subclass overrides this and allows non-GETs to be + // de-duplicated it will be important for it to include (at least!) the + // method in the deduplication key, so we're explicitly adding GET here. + const cacheKey = this.cacheKeyFor(url, request); + if (request.method === 'GET') { + return { + policy: 'deduplicate-during-request-lifetime', + deduplicationKey: `${request.method} ${cacheKey}`, + }; + } else { + return { + policy: 'do-not-deduplicate', + // Always invalidate GETs when a different method is seen on the same + // cache key (ie, URL), as per standard HTTP semantics. (We don't have + // to invalidate the key with this HTTP method because we never write + // it.) + invalidateDeduplicationKeys: [`GET ${cacheKey}`], + }; } -} ``` -#### Methods - -##### `cacheKeyFor` -By default, `RESTDatasource` uses the full request URL as the cache key. Override this method to remove query parameters or compute a custom cache key. +To fully disable de-duplication, just always return `do-not-duplicate`. (This does not affect the HTTP header-sensitive cache.) -For example, you could use this to use header fields as part of the cache key. Even though we do validate header fields and don't serve responses from cache when they don't match, new responses overwrite old ones with different header fields. +```ts +class MoviesAPI extends RESTDataSource { + protected override requestDeduplicationPolicyFor() { + return { policy: 'do-not-deduplicate' } as const; + } +} +``` ##### `willSendRequest` This method is invoked at the beginning of processing each request. It's called @@ -136,10 +168,7 @@ The `get` method on the [`RESTDataSource`](https://github.com/apollographql/data ```javascript class MoviesAPI extends RESTDataSource { - constructor() { - super(); - this.baseURL = 'https://movies-api.example.com/'; - } + override baseURL = 'https://movies-api.example.com/'; // an example making an HTTP POST request async postMovie(movie) { @@ -207,8 +236,10 @@ import { RESTDataSource, AugmentedRequest } from '@apollo/datasource-rest'; class PersonalizationAPI extends RESTDataSource { override baseURL = 'https://personalization-api.example.com/'; - constructor(private token: string) { - super(); + private token: string; + constructor(options: { cache: KeyValueCache; token: string}) { + super(options); + this.token = options.token; } override willSendRequest(_path: string, request: AugmentedRequest) { @@ -222,11 +253,9 @@ class PersonalizationAPI extends RESTDataSource { In some cases, you'll want to set the URL based on the environment or other contextual values. To do this, you can override `resolveURL`: ```ts -class PersonalizationAPI extends RESTDataSource { - constructor(private token: string) { - super(); - } +import type { KeyValueCache } from '@apollo/utils.keyvaluecache'; +class PersonalizationAPI extends RESTDataSource { override async resolveURL(path: string, _request: AugmentedRequest) { if (!this.baseURL) { const addresses = await resolveSrv(path.split("/")[1] + ".service.consul"); diff --git a/src/RESTDataSource.ts b/src/RESTDataSource.ts index 8580847..bba73d6 100644 --- a/src/RESTDataSource.ts +++ b/src/RESTDataSource.ts @@ -67,11 +67,39 @@ export interface DataSourceConfig { fetch?: Fetcher; } +// RESTDataSource has two layers of caching. The first layer is purely in-memory +// within a single RESTDataSource object and is called "request deduplication". +// It is primarily designed so that multiple identical GET requests started +// concurrently can share one real HTTP GET; it does not observe HTTP response +// headers. (The second layer uses a potentially shared KeyValueCache for +// storage and does observe HTTP response headers.) To configure request +// deduplication, override requestDeduplicationPolicyFor. +export type RequestDeduplicationPolicy = + // If a request with the same deduplication key is in progress, share its + // result. Otherwise, start a request, allow other requests to de-duplicate + // against it while it is running, and forget about it once the request returns + // successfully. + | { policy: 'deduplicate-during-request-lifetime'; deduplicationKey: string } + // If a request with the same deduplication key is in progress, share its + // result. Otherwise, start a request and allow other requests to de-duplicate + // against it while it is running. All future requests with policy + // `deduplicate-during-request-lifetime` or `deduplicate-until-invalidated` + // with the same `deduplicationKey` will share the same result until a request + // is started with policy `do-not-deduplicate` and a matching entry in + // `invalidateDeduplicationKeys`. + | { policy: 'deduplicate-until-invalidated'; deduplicationKey: string } + // Always run an actual HTTP request and don't allow other requests to + // de-duplicate against it. Additionally, invalidate any listed keys + // immediately: new requests with that deduplicationKey will not match any + // requests that current exist. (The invalidation feature is used so that + // doing (say) `DELETE /path` invalidates any result for `GET /path` within + // the deduplication store.) + | { policy: 'do-not-deduplicate'; invalidateDeduplicationKeys?: string[] }; + export abstract class RESTDataSource { httpCache: HTTPCache; - memoizedResults = new Map>(); + protected deduplicationPromises = new Map>(); baseURL?: string; - memoizeGetRequests: boolean = true; constructor(config?: DataSourceConfig) { this.httpCache = new HTTPCache(config?.cache, config?.fetch); @@ -86,6 +114,49 @@ export abstract class RESTDataSource { return url.toString(); } + /** + * Calculates the deduplication policy for the request. + * + * By default, GET requests have the policy + * `deduplicate-during-request-lifetime` with deduplication key `GET + * ${cacheKey}`, and all other requests have the policy `do-not-deduplicate` + * and invalidate `GET ${cacheKey}`, where `cacheKey` is the value returned by + * `cacheKeyFor` (and is the same cache key used in the HTTP-header-sensitive + * shared cache). + * + * Note that the default cache key only contains the URL (not the method, + * headers, body, etc), so if you send multiple GET requests that differ only + * in headers (etc), or if you change your policy to allow non-GET requests to + * be deduplicated, you may want to put more information into the cache key or + * be careful to keep the HTTP method in the deduplication key. + */ + protected requestDeduplicationPolicyFor( + url: URL, + request: RequestOptions, + ): RequestDeduplicationPolicy { + // Start with the cache key that is used for the shared header-sensitive + // cache. Note that its default implementation does not include the HTTP + // method, so if a subclass overrides this and allows non-GETs to be + // de-duplicated it will be important for it to include (at least!) the + // method in the deduplication key, so we're explicitly adding GET here. + const cacheKey = this.cacheKeyFor(url, request); + if (request.method === 'GET') { + return { + policy: 'deduplicate-during-request-lifetime', + deduplicationKey: `${request.method} ${cacheKey}`, + }; + } else { + return { + policy: 'do-not-deduplicate', + // Always invalidate GETs when a different method is seen on the same + // cache key (ie, URL), as per standard HTTP semantics. (We don't have + // to invalidate the key with this HTTP method because we never write + // it.) + invalidateDeduplicationKeys: [`GET ${cacheKey}`], + }; + } + } + protected willSendRequest?( path: string, requestOpts: AugmentedRequest, @@ -260,10 +331,9 @@ export abstract class RESTDataSource { // (not possibly an `object`). const outgoingRequest = augmentedRequest as RequestOptions; - const cacheKey = this.cacheKeyFor(url, outgoingRequest); - const performRequest = async () => { return this.trace(url, outgoingRequest, async () => { + const cacheKey = this.cacheKeyFor(url, outgoingRequest); const cacheOptions = outgoingRequest.cacheOptions ? outgoingRequest.cacheOptions : this.cacheOptionsFor?.bind(this); @@ -281,19 +351,37 @@ export abstract class RESTDataSource { // Cache GET requests based on the calculated cache key // Disabling the request cache does not disable the response cache - if (this.memoizeGetRequests) { - if (outgoingRequest.method === 'GET') { - let promise = this.memoizedResults.get(cacheKey); - if (promise) return promise; - - promise = performRequest(); - this.memoizedResults.set(cacheKey, promise); - return promise; - } else { - this.memoizedResults.delete(cacheKey); - return performRequest(); + const policy = this.requestDeduplicationPolicyFor(url, outgoingRequest); + if ( + policy.policy === 'deduplicate-during-request-lifetime' || + policy.policy === 'deduplicate-until-invalidated' + ) { + const previousRequestPromise = this.deduplicationPromises.get( + policy.deduplicationKey, + ); + if (previousRequestPromise) return previousRequestPromise; + + const thisRequestPromise = performRequest(); + this.deduplicationPromises.set( + policy.deduplicationKey, + thisRequestPromise, + ); + try { + // The request promise needs to be awaited here rather than just + // returned. This ensures that the request completes before it's removed + // from the cache. Additionally, the use of finally here guarantees the + // deduplication cache is cleared in the event of an error during the + // request. + return await thisRequestPromise; + } finally { + if (policy.policy === 'deduplicate-during-request-lifetime') { + this.deduplicationPromises.delete(policy.deduplicationKey); + } } } else { + for (const key of policy.invalidateDeduplicationKeys ?? []) { + this.deduplicationPromises.delete(key); + } return performRequest(); } } diff --git a/src/__tests__/RESTDataSource.test.ts b/src/__tests__/RESTDataSource.test.ts index e12b65b..1697cff 100644 --- a/src/__tests__/RESTDataSource.test.ts +++ b/src/__tests__/RESTDataSource.test.ts @@ -3,6 +3,7 @@ import { AuthenticationError, CacheOptions, DataSourceConfig, + RequestDeduplicationPolicy, ForbiddenError, RequestOptions, RESTDataSource, @@ -584,8 +585,8 @@ describe('RESTDataSource', () => { }); }); - describe('memoization/request cache', () => { - it('de-duplicates requests with the same cache key', async () => { + describe('deduplication', () => { + it('de-duplicates simultaneous requests with the same cache key', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; @@ -599,6 +600,47 @@ describe('RESTDataSource', () => { await Promise.all([dataSource.getFoo(1), dataSource.getFoo(1)]); }); + it('does not de-duplicate sequential requests with the same cache key', async () => { + const dataSource = new (class extends RESTDataSource { + override baseURL = 'https://api.example.com'; + + getFoo(id: number) { + return this.get(`foo/${id}`); + } + })(); + + nock(apiUrl).get('/foo/1').reply(200); + nock(apiUrl).get('/foo/1').reply(200); + await dataSource.getFoo(1); + await dataSource.getFoo(1); + }); + + it('de-duplicates sequential requests with the same cache key with policy deduplicate-until-invalidated', async () => { + const dataSource = new (class extends RESTDataSource { + override baseURL = 'https://api.example.com'; + protected override requestDeduplicationPolicyFor( + url: URL, + request: RequestOptions, + ): RequestDeduplicationPolicy { + const p = super.requestDeduplicationPolicyFor(url, request); + return p.policy === 'deduplicate-during-request-lifetime' + ? { + policy: 'deduplicate-until-invalidated', + deduplicationKey: p.deduplicationKey, + } + : p; + } + + getFoo(id: number) { + return this.get(`foo/${id}`); + } + })(); + + nock(apiUrl).get('/foo/1').reply(200); + await dataSource.getFoo(1); + await dataSource.getFoo(1); + }); + it('does not deduplicate requests with a different cache key', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; @@ -614,7 +656,7 @@ describe('RESTDataSource', () => { await Promise.all([dataSource.getFoo(1), dataSource.getFoo(2)]); }); - it('does not deduplicate non-GET requests', async () => { + it('does not deduplicate non-GET requests by default', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; @@ -629,7 +671,7 @@ describe('RESTDataSource', () => { await Promise.all([dataSource.postFoo(1), dataSource.postFoo(1)]); }); - it('non-GET request removes memoized request with the same cache key', async () => { + it('non-GET request invalidates deduplication of request with the same cache key', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; @@ -653,6 +695,40 @@ describe('RESTDataSource', () => { ]); }); + it('non-GET request invalidates deduplication of request with the same cache key with deduplicate-until-invalidated', async () => { + const dataSource = new (class extends RESTDataSource { + override baseURL = 'https://api.example.com'; + protected override requestDeduplicationPolicyFor( + url: URL, + request: RequestOptions, + ): RequestDeduplicationPolicy { + const p = super.requestDeduplicationPolicyFor(url, request); + return p.policy === 'deduplicate-during-request-lifetime' + ? { + policy: 'deduplicate-until-invalidated', + deduplicationKey: p.deduplicationKey, + } + : p; + } + + getFoo(id: number) { + return this.get(`foo/${id}`); + } + + postFoo(id: number) { + return this.post(`foo/${id}`); + } + })(); + + nock(apiUrl).get('/foo/1').reply(200); + nock(apiUrl).post('/foo/1').reply(200); + nock(apiUrl).get('/foo/1').reply(200); + + await dataSource.getFoo(1); + await dataSource.postFoo(1); + await dataSource.getFoo(1); + }); + it('allows specifying a custom cache key', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; @@ -678,10 +754,12 @@ describe('RESTDataSource', () => { ]); }); - it('allows disabling the GET cache', async () => { + it('allows disabling deduplication', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; - override memoizeGetRequests = false; + protected override requestDeduplicationPolicyFor() { + return { policy: 'do-not-deduplicate' } as const; + } getFoo(id: number) { return this.get(`foo/${id}`); @@ -833,10 +911,56 @@ describe('RESTDataSource', () => { }); describe('http cache', () => { + // Skipping due to https://github.com/apollographql/datasource-rest/issues/102 + it.skip('caches 301 responses', async () => { + const dataSource = new (class extends RESTDataSource { + override baseURL = 'https://api.example.com'; + getFoo(id: number) { + return this.get(`foo/${id}`); + } + })(); + + nock(apiUrl).get('/foo/1').reply(301, '', { + location: 'https://api.example.com/foo/2', + 'cache-control': 'public, max-age=31536000, immutable', + }); + nock(apiUrl).get('/foo/2').reply(200); + await dataSource.getFoo(1); + + // Call a second time which should be cached + await dataSource.getFoo(1); + }); + + it('does not cache 302 responses', async () => { + const dataSource = new (class extends RESTDataSource { + override baseURL = 'https://api.example.com'; + getFoo(id: number) { + return this.get(`foo/${id}`); + } + })(); + + nock(apiUrl).get('/foo/1').reply(302, '', { + location: 'https://api.example.com/foo/2', + 'cache-control': 'public, max-age=31536000, immutable', + }); + nock(apiUrl).get('/foo/2').reply(200); + await dataSource.getFoo(1); + + // Call a second time which should NOT be cached (it's a temporary redirect!). + nock(apiUrl).get('/foo/1').reply(302, '', { + location: 'https://api.example.com/foo/2', + 'cache-control': 'public, max-age=31536000, immutable', + }); + nock(apiUrl).get('/foo/2').reply(200); + await dataSource.getFoo(1); + }); + it('allows setting cache options for each request', async () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; - override memoizeGetRequests = false; + protected override requestDeduplicationPolicyFor() { + return { policy: 'do-not-deduplicate' } as const; + } getFoo(id: number) { return this.get(`foo/${id}`); @@ -863,7 +987,9 @@ describe('RESTDataSource', () => { const dataSource = new (class extends RESTDataSource { override baseURL = 'https://api.example.com'; - override memoizeGetRequests = false; + protected override requestDeduplicationPolicyFor() { + return { policy: 'do-not-deduplicate' } as const; + } getFoo(id: number) { return this.get(`foo/${id}`);