Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(NODE-6246): Significantly improve memory usage and performance of ObjectId #703

Closed
227 changes: 137 additions & 90 deletions src/objectid.ts

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,7 @@ function deserializeObject(
value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_OID) {
const oid = ByteUtils.allocateUnsafe(12);
for (let i = 0; i < 12; i++) oid[i] = buffer[index + i];
value = new ObjectId(oid);
value = new ObjectId(buffer, index);
index = index + 12;
} else if (elementType === constants.BSON_DATA_INT && promoteValues === false) {
value = new Int32(NumberUtils.getInt32LE(buffer, index));
Expand Down Expand Up @@ -608,9 +606,7 @@ function deserializeObject(
index = index + stringSize;

// Read the oid
const oidBuffer = ByteUtils.allocateUnsafe(12);
for (let i = 0; i < 12; i++) oidBuffer[i] = buffer[index + i];
const oid = new ObjectId(oidBuffer);
const oid = new ObjectId(buffer, index);

// Update the index
index = index + 12;
Expand Down
15 changes: 7 additions & 8 deletions src/parser/serializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,9 @@ function serializeObjectId(buffer: Uint8Array, key: string, value: ObjectId, ind
// Write the type
buffer[index++] = constants.BSON_DATA_OID;
// Number of written bytes
const numberOfWrittenBytes = ByteUtils.encodeUTF8Into(buffer, key, index);
index += ByteUtils.encodeUTF8Into(buffer, key, index);
nbbeeken marked this conversation as resolved.
Show resolved Hide resolved

// Encode the name
index = index + numberOfWrittenBytes;
buffer[index++] = 0;

index += value.serializeInto(buffer, index);
Expand Down Expand Up @@ -647,6 +646,8 @@ export function serializeInto(
index = serializeNull(buffer, key, value, index);
} else if (value === null) {
index = serializeNull(buffer, key, value, index);
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
} else if (isUint8Array(value)) {
index = serializeBuffer(buffer, key, value, index);
} else if (value instanceof RegExp || isRegExp(value)) {
Expand All @@ -668,8 +669,6 @@ export function serializeInto(
value[Symbol.for('@@mdb.bson.version')] !== constants.BSON_MAJOR_VERSION
) {
throw new BSONVersionError();
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
} else if (value._bsontype === 'Decimal128') {
index = serializeDecimal128(buffer, key, value, index);
} else if (value._bsontype === 'Long' || value._bsontype === 'Timestamp') {
Expand Down Expand Up @@ -757,6 +756,8 @@ export function serializeInto(
index = serializeDate(buffer, key, value, index);
} else if (value === null || (value === undefined && ignoreUndefined === false)) {
index = serializeNull(buffer, key, value, index);
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
Comment on lines +759 to +760
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation for moving this step up here? seems like it may have broken the version check that comes later for this BSONType, unless I'm mistaken?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did see a measurable performance gain by moving the ObjectId check to the top if in my testing, I believe since some of those checks like isUInt8Array() and Symbol.for("@@mdb.bson.version") are moderately expensive for such a hot path. Now that I'm thinking about this we can instantiate a single Symbol variable and just reference it instead of creating it every time, and maybe move the isUInt8Array() down lower if that doesn't break any tests.

Copy link
Contributor Author

@SeanReece SeanReece Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: I narrowed the performance impact to the isUint8Array(), isRegExp(), and isDate() calls, which all invoke Object.prototype.toString.call(value) which is expensive. Did some refactoring and was able to get serialize from 2.4m ops/sec -> 3m ops/sec, which in my testing is only about -8% decrease in raw serialization perf. AND all the tests are passing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A shared copy of the symbol sounds like a good call. As long as the version check is still performed, moving probably the most common BSON type into a position where it performs better sounds good to me.

} else if (isUint8Array(value)) {
index = serializeBuffer(buffer, key, value, index);
} else if (value instanceof RegExp || isRegExp(value)) {
Expand All @@ -778,8 +779,6 @@ export function serializeInto(
value[Symbol.for('@@mdb.bson.version')] !== constants.BSON_MAJOR_VERSION
) {
throw new BSONVersionError();
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
} else if (type === 'object' && value._bsontype === 'Decimal128') {
index = serializeDecimal128(buffer, key, value, index);
} else if (value._bsontype === 'Long' || value._bsontype === 'Timestamp') {
Expand Down Expand Up @@ -867,6 +866,8 @@ export function serializeInto(
if (ignoreUndefined === false) index = serializeNull(buffer, key, value, index);
} else if (value === null) {
index = serializeNull(buffer, key, value, index);
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
} else if (isUint8Array(value)) {
index = serializeBuffer(buffer, key, value, index);
} else if (value instanceof RegExp || isRegExp(value)) {
Expand All @@ -888,8 +889,6 @@ export function serializeInto(
value[Symbol.for('@@mdb.bson.version')] !== constants.BSON_MAJOR_VERSION
) {
throw new BSONVersionError();
} else if (value._bsontype === 'ObjectId') {
index = serializeObjectId(buffer, key, value, index);
} else if (type === 'object' && value._bsontype === 'Decimal128') {
index = serializeDecimal128(buffer, key, value, index);
} else if (value._bsontype === 'Long' || value._bsontype === 'Timestamp') {
Expand Down
2 changes: 1 addition & 1 deletion src/utils/byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export type ByteUtils = {
/** Create a Uint8Array from a hex string */
fromHex: (hex: string) => Uint8Array;
/** Create a lowercase hex string from bytes */
toHex: (buffer: Uint8Array) => string;
toHex: (buffer: Uint8Array, start?: number, end?: number) => string;
/** Create a string from utf8 code units, fatal=true will throw an error if UTF-8 bytes are invalid, fatal=false will insert replacement characters */
toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
Expand Down
4 changes: 2 additions & 2 deletions src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ export const nodeJsByteUtils = {
return Buffer.from(hex, 'hex');
},

toHex(buffer: Uint8Array): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('hex');
toHex(buffer: NodeJsBuffer, start?: number, end?: number): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('hex', start, end);
},

toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
Expand Down
14 changes: 14 additions & 0 deletions src/utils/string_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,17 @@ export function validateStringCharacters(str: string, radix?: number): false | s
const regex = new RegExp(`[^-+${validCharacters}]`, 'i');
return regex.test(str) ? false : str;
}

/**
* @internal
* "flattens" a string that was created through concatenation of multiple strings.
* Most engines will try to optimize concatenation of strings using a "rope" graph of the substrings,
* This can lead to increased memory usage with extra pointers and performance issues when operating on these strings.
* `string.charAt(0)` forces the engine to flatten the string before performing the operation.
* See https://en.wikipedia.org/wiki/Rope_(data_structure)
* See https://docs.google.com/document/d/1o-MJPAddpfBfDZCkIHNKbMiM86iDFld7idGbNQLuKIQ
*/
export function flattenString(str: string): string {
str.charAt(0);
return str;
}
2 changes: 1 addition & 1 deletion src/utils/web_byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ export const webByteUtils = {
return Uint8Array.from(buffer);
},

toHex(uint8array: Uint8Array): string {
toHex(uint8array: Uint8Array, _start?: number, _end?: number): string {
return Array.from(uint8array, byte => byte.toString(16).padStart(2, '0')).join('');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this call .subarray on the unint8Array with start/end? Otherwise won't this return a different result than the Node.js version of this API?

},

Expand Down
10 changes: 2 additions & 8 deletions test/node/bson_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1661,18 +1661,12 @@ describe('BSON', function () {
expect(__id).to.equal(a.toHexString());

// number
var genTime = a.generationTime;
a = new ObjectId(genTime);
a = new ObjectId(Date.now());
__id = a.__id;
expect(__id).to.equal(a.toHexString());

// generationTime
delete a.__id;
a.generationTime = genTime;
expect(__id).to.equal(a.toHexString());

// createFromTime
a = ObjectId.createFromTime(genTime);
a = ObjectId.createFromTime(Date.now());
__id = a.__id;
expect(__id).to.equal(a.toHexString());
ObjectId.cacheHexString = false;
Expand Down
43 changes: 26 additions & 17 deletions test/node/object_id.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,31 @@ describe('ObjectId', function () {
done();
});

it('should correctly create ObjectId from valid Buffer and offset', function (done) {
if (!Buffer.from) return done();
let a = 'AAAAAAAAAAAAAAAAAAAAAAAA';
let b = new ObjectId(Buffer.from(`aaaa${a}aaaa`, 'hex'), 2);
let c = b.equals(a); // => false
expect(true).to.equal(c);

a = 'aaaaaaaaaaaaaaaaaaaaaaaa';
b = new ObjectId(Buffer.from(`AAAA${a}AAAA`, 'hex'), 2);
c = b.equals(a); // => true
expect(a).to.equal(b.toString());
expect(true).to.equal(c);
done();
});

it('should throw an error if invalid Buffer passed in', function () {
const a = Buffer.from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]);
expect(() => new ObjectId(a)).to.throw(BSONError);
});

it('should throw an error if invalid Buffer offset passed in', function () {
const a = Buffer.from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]);
expect(() => new ObjectId(a, 5)).to.throw(BSONError);
});

it('should correctly allow for node.js inspect to work with ObjectId', function (done) {
const a = 'AAAAAAAAAAAAAAAAAAAAAAAA';
const b = new ObjectId(a);
Expand Down Expand Up @@ -375,7 +395,7 @@ describe('ObjectId', function () {
*/
const oidString = '6b61666665656b6c61746368';
const oid = new ObjectId(oidString);
const oidKId = 'buffer';
const oidKId = '__id';
it('should return false for an undefined otherId', () => {
// otherId === undefined || otherId === null
expect(oid.equals(null)).to.be.false;
Expand Down Expand Up @@ -415,33 +435,21 @@ describe('ObjectId', function () {

it('should not rely on toString for otherIds that are instanceof ObjectId', () => {
// Note: the method access the symbol prop directly instead of the getter
const equalId = { toString: () => oidString + 'wrong', [oidKId]: oid.id };
const equalId = { toString: () => oidString + 'wrong', [oidKId]: oid.toHexString() };
Object.setPrototypeOf(equalId, ObjectId.prototype);
expect(oid.toString()).to.not.equal(equalId.toString());
expect(oid.equals(equalId)).to.be.true;
});

it('should use otherId[kId] Buffer for equality when otherId has _bsontype === ObjectId', () => {
let equalId = { _bsontype: 'ObjectId', [oidKId]: oid.id };

const propAccessRecord: string[] = [];
equalId = new Proxy(equalId, {
get(target, prop: string, recv) {
if (prop !== '_bsontype') {
propAccessRecord.push(prop);
}
return Reflect.get(target, prop, recv);
}
});
it('should use otherId[kId] for equality when otherId has _bsontype === ObjectId', () => {
const equalId = { _bsontype: 'ObjectId', [oidKId]: oid.toHexString() };

expect(oid.equals(equalId)).to.be.true;
// once for the 11th byte shortcut
// once for the total equality
expect(propAccessRecord).to.deep.equal([oidKId, oidKId]);
});
});

it('should return the same instance if a buffer is passed in', function () {
ObjectId.cacheBuffer = true;
const inBuffer = Buffer.from('00'.repeat(12), 'hex');

const outBuffer = new ObjectId(inBuffer);
Expand All @@ -452,6 +460,7 @@ describe('ObjectId', function () {
expect(inBuffer).to.deep.equal(outBuffer.id);
// class method equality
expect(Buffer.prototype.equals.call(inBuffer, outBuffer.id)).to.be.true;
ObjectId.cacheBuffer = false;
});

context('createFromHexString()', () => {
Expand Down