Skip to content

Commit

Permalink
fix(perf): pull hashes without refreshing metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenplusplus committed Mar 10, 2021
1 parent 58ccc7a commit 9f35847
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 86 deletions.
57 changes: 35 additions & 22 deletions src/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,8 @@ class File extends ServiceObject<File> {
crc32c = false;
}

const shouldRunValidation = !rangeRequest && (crc32c || md5);

if (rangeRequest) {
if (
typeof options.validation === 'string' ||
Expand Down Expand Up @@ -1268,6 +1270,8 @@ class File extends ServiceObject<File> {
qs: query,
};

const hashes: {crc32c?: string; md5?: string} = {};

this.requestStream(reqOpts)
.on('error', err => {
throughStream.destroy(err);
Expand Down Expand Up @@ -1299,18 +1303,30 @@ class File extends ServiceObject<File> {
err.message = body;
throughStream.destroy(err);
});

return;
}

rawResponseStream.on('error', onComplete);

const headers = rawResponseStream.toJSON().headers;
isServedCompressed = headers['content-encoding'] === 'gzip';
const shouldRunValidation = !rangeRequest && (crc32c || md5);

const throughStreams: Writable[] = [];

if (shouldRunValidation) {
// The x-goog-hash header should be set with a crc32c and md5 hash.
// ex: headers['x-goog-hash'] = 'crc32c=xxxx,md5=xxxx'
if (typeof headers['x-goog-hash'] === 'string') {
headers['x-goog-hash']
.split(',')
.forEach((hashKeyValPair: string) => {
const delimiterIndex = hashKeyValPair.indexOf('=');
const hashType = hashKeyValPair.substr(0, delimiterIndex);
const hashValue = hashKeyValPair.substr(delimiterIndex + 1);
hashes[hashType as 'crc32c' | 'md5'] = hashValue;
});
}

validateStream = hashStreamValidation({crc32c, md5});
throughStreams.push(validateStream);
}
Expand Down Expand Up @@ -1345,16 +1361,28 @@ class File extends ServiceObject<File> {
return;
}

if (rangeRequest) {
if (rangeRequest || !shouldRunValidation) {
onCompleteCalled = true;
throughStream.end();
return;
}

if (!refreshedMetadata) {
refreshedMetadata = true;
this.getMetadata({userProject: options.userProject}, onComplete);
return;
// TODO(https://github.com/googleapis/nodejs-storage/issues/709):
// Remove once the backend issue is fixed.
// If object is stored compressed (having
// metadata.contentEncoding === 'gzip') and was served decompressed,
// then skip checksum validation because the remote checksum is computed
// against the compressed version of the object.
if (!isServedCompressed) {
if (!refreshedMetadata) {
refreshedMetadata = true;
this.getMetadata({userProject: options.userProject}, onComplete);
return;
}
if (this.metadata.contentEncoding === 'gzip') {
throughStream.end();
return;
}
}

if (onCompleteCalled) {
Expand All @@ -1363,21 +1391,6 @@ class File extends ServiceObject<File> {

onCompleteCalled = true;

// TODO(https://github.com/googleapis/nodejs-storage/issues/709):
// Remove once the backend issue is fixed.
// If object is stored compressed (having metadata.contentEncoding === 'gzip')
// and was served decompressed, then skip checksum validation because the
// remote checksum is computed against the compressed version of the object.
if (this.metadata.contentEncoding === 'gzip' && !isServedCompressed) {
throughStream.end();
return;
}

const hashes = {
crc32c: this.metadata.crc32c,
md5: this.metadata.md5Hash,
};

// If we're doing validation, assume the worst-- a data integrity
// mismatch. If not, these tests won't be performed, and we can assume
// the best.
Expand Down
130 changes: 66 additions & 64 deletions test/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,9 @@ describe('File', () => {
});

describe('createReadStream', () => {
const CRC32C_HASH = 'crc32c-hash';
const MD5_HASH = 'md5-hash';

function getFakeRequest(data?: {}) {
let requestOptions: DecorateRequestOptions | undefined;

Expand Down Expand Up @@ -1130,10 +1133,6 @@ describe('File', () => {
return requestStream;
};

file.getMetadata = (options: object, callback: Function) => {
callback();
};

file
.createReadStream({validation: false})
.on('error', (err: Error) => {
Expand All @@ -1154,6 +1153,10 @@ describe('File', () => {
const GZIPPED_DATA = zlib.gzipSync(DATA);

beforeEach(() => {
hashStreamValidationOverride = () =>
Object.assign(new PassThrough(), {
test: () => true,
});
handleRespOverride = (
err: Error,
res: {},
Expand All @@ -1166,6 +1169,7 @@ describe('File', () => {
return {
headers: {
'content-encoding': 'gzip',
'x-goog-hash': `crc32c=${CRC32C_HASH},md5=${MD5_HASH}`,
},
};
},
Expand Down Expand Up @@ -1227,9 +1231,6 @@ describe('File', () => {
});
return createGunzipStream;
};
file.getMetadata = (options: object, callback: Function) => {
callback();
};
file
.createReadStream({validation: false})
.on('error', (err: Error) => {
Expand All @@ -1249,61 +1250,44 @@ describe('File', () => {
let fakeValidationStream: Stream & {test: Function};

beforeEach(() => {
file.metadata.mediaLink = 'http://uri';

file.getMetadata = (options: {}, callback: Function) => {
file.metadata = {
crc32c: '####wA==',
md5Hash: 'CY9rzUYh03PK3k6DJie09g==',
};
callback();
};

file.getMetadata = (opts: {}, callback: Function) => callback();
fakeValidationStream = Object.assign(new PassThrough(), {
test: () => true,
});
hashStreamValidationOverride = () => {
return fakeValidationStream;
};
handleRespOverride = (
err: Error,
res: {},
body: {},
callback: Function
) => {
const rawResponseStream = new PassThrough();
Object.assign(rawResponseStream, {
toJSON() {
return {
headers: {
'x-goog-hash': `crc32c=${CRC32C_HASH},md5=${MD5_HASH}`,
},
};
},
});
callback(null, null, rawResponseStream);
setImmediate(() => {
rawResponseStream.end(data);
});
};
file.requestStream = getFakeSuccessfulRequest(data);
});

describe('server decompression', () => {
beforeEach(() => {
handleRespOverride = (
err: Error,
res: {},
body: {},
callback: Function
) => {
const rawResponseStream = new PassThrough();
Object.assign(rawResponseStream, {
toJSON() {
return {
headers: {},
};
},
});
callback(null, null, rawResponseStream);
setImmediate(() => {
rawResponseStream.end(data);
});
};
file.requestStream = getFakeSuccessfulRequest(data);
});

it('should skip validation if file was stored compressed', done => {
file.metadata.contentEncoding = 'gzip';

const validateStub = sinon.stub().returns(true);
fakeValidationStream.test = validateStub;

file.getMetadata = (options: {}, callback: Function) => {
file.metadata = {
crc32c: '####wA==',
md5Hash: 'CY9rzUYh03PK3k6DJie09g==',
contentEncoding: 'gzip',
};
callback();
};

file
.createReadStream({validation: 'crc32c'})
.on('error', done)
Expand Down Expand Up @@ -1371,7 +1355,7 @@ describe('File', () => {
done();
};

file.requestStream = getFakeSuccessfulRequest(data);
file.requestStream = getFakeSuccessfulRequest('data');

file.createReadStream(fakeOptions).on('error', done).resume();
});
Expand All @@ -1395,7 +1379,11 @@ describe('File', () => {

it('should validate with crc32c', done => {
file.requestStream = getFakeSuccessfulRequest(data);

// eslint-disable-next-line @typescript-eslint/no-explicit-any
(fakeValidationStream as any).test = (algo: string) => {
assert.strictEqual(algo, 'crc32c');
return true;
};
file
.createReadStream({validation: 'crc32c'})
.on('error', done)
Expand Down Expand Up @@ -1430,7 +1418,10 @@ describe('File', () => {
it('should emit an error if md5 validation fails', done => {
file.requestStream = getFakeSuccessfulRequest('bad-data');
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(fakeValidationStream as any).test = () => false;
(fakeValidationStream as any).test = (algo: string) => {
assert.strictEqual(algo, 'md5');
return false;
};
file
.createReadStream({validation: 'md5'})
.on('error', (err: ApiError) => {
Expand All @@ -1441,13 +1432,12 @@ describe('File', () => {
});

it('should default to crc32c validation', done => {
file.getMetadata = (options: {}, callback: Function) => {
file.metadata = {
crc32c: file.metadata.crc32c,
};
callback();
file.requestStream = getFakeSuccessfulRequest('bad-data');
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(fakeValidationStream as any).test = (algo: string) => {
assert.strictEqual(algo, 'crc32c');
return false;
};
file.requestStream = getFakeSuccessfulRequest(data);
file
.createReadStream()
.on('error', (err: ApiError) => {
Expand Down Expand Up @@ -1486,13 +1476,25 @@ describe('File', () => {
});

it('should destroy if MD5 is requested but absent', done => {
file.getMetadata = (options: {}, callback: Function) => {
file.metadata = {
crc32c: file.metadata.crc32c,
};
callback();
handleRespOverride = (
err: Error,
res: {},
body: {},
callback: Function
) => {
const rawResponseStream = new PassThrough();
Object.assign(rawResponseStream, {
toJSON() {
return {
headers: {},
};
},
});
callback(null, null, rawResponseStream);
setImmediate(() => {
rawResponseStream.end();
});
};

file.requestStream = getFakeSuccessfulRequest('bad-data');

const readStream = file.createReadStream({validation: 'md5'});
Expand Down

0 comments on commit 9f35847

Please sign in to comment.