From 5ab6b88e3ba5765cdf7689d8d2238f9267ff355a Mon Sep 17 00:00:00 2001 From: Yigal Date: Thu, 18 Jul 2024 11:36:41 +0300 Subject: [PATCH] feat(brotli): support brotli compression format --- package.json | 8 +-- src/node/decompress.js | 15 ++++++ src/node/index.js | 55 ++++++++++++--------- src/node/unzip.js | 17 ++++--- src/utils.js | 32 ++++++++++++ test/node/inflate.js | 84 ++++++++++++++++++++++++++++++++ test/node/utils.js | 108 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 284 insertions(+), 35 deletions(-) create mode 100644 src/node/decompress.js diff --git a/package.json b/package.json index a371d7bf8..decdd673a 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,8 @@ "browser": { "./src/node/index.js": "./src/client.js", "./lib/node/index.js": "./lib/client.js", - "./test/support/server.js": "./test/support/blank.js" + "./test/support/server.js": "./test/support/blank.js", + "re2": false }, "bugs": { "url": "https://github.com/ladjs/superagent/issues" @@ -26,7 +27,8 @@ "formidable": "^3.5.1", "methods": "^1.1.2", "mime": "2.6.0", - "qs": "^6.11.0" + "qs": "^6.11.0", + "re2": "^1.21.3" }, "devDependencies": { "@babel/cli": "^7.20.7", @@ -62,7 +64,7 @@ "rimraf": "3", "should": "^13.2.3", "should-http": "^0.1.1", - "tinyify": "3.0.0", + "tinyify": "4.0.0", "xo": "^0.53.1", "zuul": "^3.12.0" }, diff --git a/src/node/decompress.js b/src/node/decompress.js new file mode 100644 index 000000000..81de84412 --- /dev/null +++ b/src/node/decompress.js @@ -0,0 +1,15 @@ +const zlib = require('zlib'); +const utils = require('../utils'); +const { isGzipOrDeflateEncoding, isBrotliEncoding } = utils; + +exports.chooseDecompresser = (res) => { + let decompresser; + if (isGzipOrDeflateEncoding(res)) { + decompresser = zlib.createUnzip(); + } else if (isBrotliEncoding(res)) { + decompresser = zlib.createBrotliDecompress(); + } else { + throw new Error('unknown content-encoding'); + } + return decompresser; +} diff --git a/src/node/index.js b/src/node/index.js index 75217c8bd..74b357005 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -21,10 +21,11 @@ const safeStringify = require('fast-safe-stringify'); const utils = require('../utils'); const RequestBase = require('../request-base'); const http2 = require('./http2wrapper'); -const { unzip } = require('./unzip'); +const { decompress } = require('./unzip'); const Response = require('./response'); -const { mixin, hasOwn } = utils; +const { mixin, hasOwn, isBrotliEncoding, isGzipOrDeflateEncoding } = utils; +const { chooseDecompresser } = require('./decompress'); function request(method, url) { // callback @@ -446,9 +447,11 @@ Request.prototype._pipeContinue = function (stream, options) { this._emitResponse(); if (this._aborted) return; - if (this._shouldUnzip(res)) { - const unzipObject = zlib.createUnzip(); - unzipObject.on('error', (error) => { + if (this._shouldDecompress(res)) { + + let decompresser = chooseDecompresser(res); + + decompresser.on('error', (error) => { if (error && error.code === 'Z_BUF_ERROR') { // unexpected end of file is ignored by browsers and curl stream.emit('end'); @@ -457,9 +460,9 @@ Request.prototype._pipeContinue = function (stream, options) { stream.emit('error', error); }); - res.pipe(unzipObject).pipe(stream, options); - // don't emit 'end' until unzipObject has completed writing all its data. - unzipObject.once('end', () => this.emit('end')); + res.pipe(decompresser).pipe(stream, options); + // don't emit 'end' until decompresser has completed writing all its data. + decompresser.once('end', () => this.emit('end')); } else { res.pipe(stream, options); res.once('end', () => this.emit('end')); @@ -1045,8 +1048,8 @@ Request.prototype._end = function () { } // zlib support - if (this._shouldUnzip(res)) { - unzip(req, res); + if (this._shouldDecompress(res)) { + decompress(req, res); } let buffer = this._buffer; @@ -1275,22 +1278,11 @@ Request.prototype._end = function () { }; // Check whether response has a non-0-sized gzip-encoded body -Request.prototype._shouldUnzip = (res) => { - if (res.statusCode === 204 || res.statusCode === 304) { - // These aren't supposed to have any body - return false; - } - - // header content is a string, and distinction between 0 and no information is crucial - if (res.headers['content-length'] === '0') { - // We know that the body is empty (unfortunately, this check does not cover chunked encoding) - return false; - } - - // console.log(res); - return /^\s*(?:deflate|gzip)\s*$/.test(res.headers['content-encoding']); +Request.prototype._shouldDecompress = (res) => { + return hasNonEmptyResponseContent(res) && (isGzipOrDeflateEncoding(res) || isBrotliEncoding(res)); }; + /** * Overrides DNS for selected hostnames. Takes object mapping hostnames to IP addresses. * @@ -1411,3 +1403,18 @@ function isJSON(mime) { function isRedirect(code) { return [301, 302, 303, 305, 307, 308].includes(code); } + +function hasNonEmptyResponseContent(res) { + if (res.statusCode === 204 || res.statusCode === 304) { + // These aren't supposed to have any body + return false; + } + + // header content is a string, and distinction between 0 and no information is crucial + if (res.headers['content-length'] === '0') { + // We know that the body is empty (unfortunately, this check does not cover chunked encoding) + return false; + } + + return true; +} diff --git a/src/node/unzip.js b/src/node/unzip.js index 46182f4da..1eeff9802 100644 --- a/src/node/unzip.js +++ b/src/node/unzip.js @@ -4,25 +4,26 @@ const { StringDecoder } = require('string_decoder'); const Stream = require('stream'); -const zlib = require('zlib'); +const { chooseDecompresser } = require('./decompress'); /** - * Buffers response data events and re-emits when they're unzipped. + * Buffers response data events and re-emits when they're decompressed. * * @param {Request} req * @param {Response} res * @api private */ -exports.unzip = (request, res) => { - const unzip = zlib.createUnzip(); +exports.decompress = (request, res) => { + let decompresser = chooseDecompresser(res); + const stream = new Stream(); let decoder; // make node responseOnEnd() happy stream.req = request; - unzip.on('error', (error) => { + decompresser.on('error', (error) => { if (error && error.code === 'Z_BUF_ERROR') { // unexpected end of file is ignored by browsers and curl stream.emit('end'); @@ -33,7 +34,7 @@ exports.unzip = (request, res) => { }); // pipe to unzip - res.pipe(unzip); + res.pipe(decompresser); // override `setEncoding` to capture encoding res.setEncoding = (type) => { @@ -41,7 +42,7 @@ exports.unzip = (request, res) => { }; // decode upon decompressing with captured encoding - unzip.on('data', (buf) => { + decompresser.on('data', (buf) => { if (decoder) { const string_ = decoder.write(buf); if (string_.length > 0) stream.emit('data', string_); @@ -50,7 +51,7 @@ exports.unzip = (request, res) => { } }); - unzip.on('end', () => { + decompresser.on('end', () => { stream.emit('end'); }); diff --git a/src/utils.js b/src/utils.js index bd8333f90..fc3514cc4 100644 --- a/src/utils.js +++ b/src/utils.js @@ -1,3 +1,15 @@ +let RE2; +let hasRE2 = true; + +try { + RE2 = require('re2'); +} catch { + hasRE2 = false; +} + +const SafeRegExp = hasRE2 ? RE2 : RegExp; + + /** * Return the mime type for the given `str`. * @@ -105,3 +117,23 @@ exports.mixin = (target, source) => { } } }; + +/** + * Check if the response is compressed using Gzip or Deflate. + * @param {Object} res + * @return {Boolean} + */ + +exports.isGzipOrDeflateEncoding = (res) => { + return new SafeRegExp(/^\s*(?:deflate|gzip)\s*$/).test(res.headers['content-encoding']); +}; + +/** + * Check if the response is compressed using Brotli. + * @param {Object} res + * @return {Boolean} + */ + +exports.isBrotliEncoding = (res) => { + return new SafeRegExp(/^\s*(?:br)\s*$/).test(res.headers['content-encoding']); +}; diff --git a/test/node/inflate.js b/test/node/inflate.js index 1a849e440..7b48a1e01 100644 --- a/test/node/inflate.js +++ b/test/node/inflate.js @@ -36,11 +36,24 @@ app.get('/binary', (request_, res) => { res.send(buf); }); }); + +app.get('/binary-brotli', (request_, res) => { + zlib.brotliCompress(subject, (error, buf) => { + res.set('Content-Encoding', 'br'); + res.send(buf); + }); +}); + app.get('/corrupt', (request_, res) => { res.set('Content-Encoding', 'gzip'); res.send('blah'); }); +app.get('/corrupt-brotli', (request_, res) => { + res.set('Content-Encoding', 'br'); + res.send('blah'); +}); + app.get('/nocontent', (request_, res, next) => { res.statusCode = 204; res.set('Content-Type', 'text/plain'); @@ -48,6 +61,13 @@ app.get('/nocontent', (request_, res, next) => { res.send(''); }); +app.get('/nocontent-brotli', (request_, res, next) => { + res.statusCode = 204; + res.set('Content-Type', 'text/plain'); + res.set('Content-Encoding', 'br'); + res.send(''); +}); + app.get('/', (request_, res, next) => { zlib.deflate(subject, (error, buf) => { res.set('Content-Type', 'text/plain'); @@ -65,6 +85,15 @@ app.get('/junk', (request_, res) => { }); }); +app.get('/junk-brotli', (request_, res) => { + zlib.brotliCompress(subject, (error, buf) => { + res.set('Content-Type', 'text/plain'); + res.set('Content-Encoding', 'br'); + res.write(buf); + res.end(' 0 junk'); + }); +}); + app.get('/chopped', (request_, res) => { zlib.deflate(`${subject}123456`, (error, buf) => { res.set('Content-Type', 'text/plain'); @@ -73,6 +102,14 @@ app.get('/chopped', (request_, res) => { }); }); +app.get('/chopped-brotli', (request_, res) => { + zlib.brotliCompress(`${subject}123456`, (error, buf) => { + res.set('Content-Type', 'text/plain'); + res.set('Content-Encoding', 'br'); + res.send(buf.slice(0, -1)); + }); +}); + describe('zlib', () => { it('should deflate the content', (done) => { request.get(base).end((error, res) => { @@ -106,6 +143,14 @@ describe('zlib', () => { }); }); + it('should ignore trailing junk-brotli', (done) => { + request.get(`${base}/junk-brotli`).end((error, res) => { + res.should.have.status(200); + res.text.should.equal(subject); + done(); + }); + }); + it('should ignore missing data', (done) => { request.get(`${base}/chopped`).end((error, res) => { assert.equal(undefined, error); @@ -115,6 +160,15 @@ describe('zlib', () => { }); }); + it('should ignore missing brotli data', (done) => { + request.get(`${base}/chopped-brotli`).end((error, res) => { + assert.equal(undefined, error); + res.should.have.status(200); + res.text.should.startWith(subject); + done(); + }); + }); + it('should handle corrupted responses', (done) => { request.get(`${base}/corrupt`).end((error, res) => { assert(error, 'missing error'); @@ -123,6 +177,13 @@ describe('zlib', () => { }); }); + it('should handle brotli corrupted responses', (done) => { + request.get(`${base}/corrupt-brotli`).end((error, res) => { + res.text.should.equal(''); + done(); + }); + }); + it('should handle no content with gzip header', (done) => { request.get(`${base}/nocontent`).end((error, res) => { assert.ifError(error); @@ -134,6 +195,17 @@ describe('zlib', () => { }); }); + it('should handle no content with gzip header', (done) => { + request.get(`${base}/nocontent-brotli`).end((error, res) => { + assert.ifError(error); + assert(res); + res.should.have.status(204); + res.text.should.equal(''); + res.headers.should.not.have.property('content-length'); + done(); + }); + }); + describe('without encoding set', () => { it('should buffer if asked', () => { return request @@ -147,6 +219,18 @@ describe('zlib', () => { }); }); + it('should buffer Brotli if asked', () => { + return request + .get(`${base}/binary-brotli`) + .buffer(true) + .then((res) => { + res.should.have.status(200); + assert(res.headers['content-length']); + assert(res.body.byteLength); + assert.equal(subject, res.body.toString()); + }); + }); + it('should emit buffers', (done) => { request.get(`${base}/binary`).end((error, res) => { res.should.have.status(200); diff --git a/test/node/utils.js b/test/node/utils.js index e0ba5e57d..50980bc18 100644 --- a/test/node/utils.js +++ b/test/node/utils.js @@ -39,3 +39,111 @@ describe('utils.parseLinks(str)', () => { ); }); }); + +describe('utils.isGzipOrDeflateEncoding(res)', () => { + it('should return true when content encoding is gzip', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': 'gzip', + }, + }).should.equal(true); + }); + it('should return true when content encoding is deflate', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': 'deflate', + }, + }).should.equal(true); + }); + it('should return false when content encoding is bla', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': 'bla', + }, + }).should.equal(false); + }); + + it('should return true when content encoding has a lot of spaces followed with gzip', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': " " * 10**6 + " gzip", + }, + }).should.equal(false); + }); + + it('should return true when content encoding repeates it self', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': "gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate", + }, + }).should.equal(false); + }); + + it('should return true when content encoding repeates it self wuth a lot of spaces', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': " gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate", + }, + }).should.equal(false); + }); + + it('should return true when content encoding - nested patterns', () => { + utils.isGzipOrDeflateEncoding({ + headers: { + 'content-encoding': " " * 10**5 + ("gzip deflate " * 1000) + }, + }).should.equal(false); + }); + + +}); + +describe('utils.isBrotliEncoding(res)', () => { + it('should return true when content encoding is br', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': 'br', + }, + }).should.equal(true); + }); + it('should return false when content encoding is bla', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': 'bla', + }, + }).should.equal(false); + }); + + it('should return true when content encoding has a lot of spaces followed with br', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': " " * 10**6 + " br", + }, + }).should.equal(false); + }); + + it('should return true when content encoding repeates it self', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': " br br br br br br br br br br br br br br br br br br br br br br br br br br br br br br", + }, + }).should.equal(false); + }); + + it('should return true when content encoding repeates it self wuth a lot of spaces', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': "br br br br br br br br br br br br br br", + }, + }).should.equal(false); + }); + + it('should return true when content encoding - nested patterns', () => { + utils.isBrotliEncoding({ + headers: { + 'content-encoding': " " * 10**5 + ("br " * 1000) + }, + }).should.equal(false); + }); + +});