Skip to content

Commit

Permalink
test(NODE-6534): add spec test runner for Binary vector (#729)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbbeeken authored Nov 15, 2024
1 parent f99fdfd commit f6e86bb
Show file tree
Hide file tree
Showing 5 changed files with 489 additions and 0 deletions.
253 changes: 253 additions & 0 deletions test/node/bson_binary_vector.spec.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import * as fs from 'fs';
import * as path from 'path';
import { BSON, BSONError, Binary } from '../register-bson';
import { expect } from 'chai';

const { toHex, fromHex } = BSON.onDemand.ByteUtils;

const FLOAT = new Float64Array(1);
const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);

FLOAT[0] = -1;
// Little endian [0, 0, 0, 0, 0, 0, 240, 191]
// Big endian [191, 240, 0, 0, 0, 0, 0, 0]
const isBigEndian = FLOAT_BYTES[7] === 0;

type VectorHexType = '0x03' | '0x27' | '0x10';
type VectorTest = {
description: string;
vector: (number | string)[];
valid: boolean;
dtype_hex: VectorHexType;
padding?: number;
canonical_bson?: string;
};
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };

function validateVector(vector: Binary): void {
const VECTOR_TYPE = Object.freeze({
Int8: 0x03,
Float32: 0x27,
PackedBit: 0x10
} as const);

if (vector.sub_type !== 9) return;

const size = vector.position;
const d_type = vector.buffer[0] ?? 0;
const padding = vector.buffer[1] ?? 0;

if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) {
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
}

if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
throw new BSONError(
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
);
}

if (d_type === VECTOR_TYPE.PackedBit && padding > 7) {
throw new BSONError(
`Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
);
}
}

function fixFloats(f: string | number): number {
if (typeof f === 'number') {
return f;
}
if (f === 'inf') {
return Infinity;
}
if (f === '-inf') {
return -Infinity;
}
throw new Error(`test format error: unknown float value: ${f}`);
}

function fixInt8s(f: number | string): number {
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');

if (f < -128 || f > 127) {
// Javascript Int8Array only supports values from -128 to 127
throw new Error(`unsupported_error: int8 out of range: ${f}`);
}
return f;
}

function fixBits(f: number | string): number {
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');

if (f > 255 || f < 0 || !Number.isSafeInteger(f)) {
// Javascript Uint8Array only supports values from 0 to 255
throw new Error(`unsupported_error: bit out of range: ${f}`);
}
return f;
}

function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
let binary: Binary;
switch (dtype_hex) {
case '0x10': /* packed_bit */
case '0x03': /* int8 */ {
const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits));
const buffer = new Uint8Array(array.byteLength + 2);
buffer.set(new Uint8Array(array.buffer), 2);
binary = new Binary(buffer, 9);
break;
}

case '0x27': /* float32 */ {
const array = new Float32Array(vector.map(fixFloats));
const buffer = new Uint8Array(array.byteLength + 2);
if (isBigEndian) {
for (let i = 0; i < array.length; i++) {
const bytes = new Uint8Array(array.buffer, i * 4, 4);
bytes.reverse();
buffer.set(bytes, i * 4 + 2);
}
} else {
buffer.set(new Uint8Array(array.buffer), 2);
}
binary = new Binary(buffer, 9);
break;
}

default:
throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
}

binary.buffer[0] = +dtype_hex;
binary.buffer[1] = padding ?? 0;

return binary;
}

const invalidTestExpectedError = new Map()
.set('FLOAT32 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors')
.set('INT8 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors')
.set(
'Padding specified with no vector data PACKED_BIT',
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
)
.set(
'Padding specified with no vector data PACKED_BIT',
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
)
.set(
'Exceeding maximum padding PACKED_BIT',
'Invalid Vector: padding must be a value between 0 and 7'
)
.set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7')
// skipped
.set('Overflow Vector PACKED_BIT', false)
.set('Underflow Vector PACKED_BIT', false)
.set('Overflow Vector INT8', false)
.set('Underflow Vector INT8', false)
.set('INT8 with float inputs', false)
// duplicate test! but also skipped.
.set('Vector with float values PACKED_BIT', false)
.set('Vector with float values PACKED_BIT', false);

describe('BSON Binary Vector spec tests', () => {
const tests: Record<string, VectorSuite> = Object.create(null);

for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
tests[path.basename(file, '.json')] = JSON.parse(
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
);
}

for (const [suiteName, suite] of Object.entries(tests)) {
describe(suiteName, function () {
const valid = suite.tests.filter(t => t.valid);
const invalid = suite.tests.filter(t => !t.valid);
describe('valid', function () {
/**
* 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string.
* 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON.
*
* > Note: For floating point number types, exact numerical matches may not be possible.
* > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32),
* > MUST assert that the input float array is the same after encoding and decoding.
*/
for (const test of valid) {
it(`encode ${test.description}`, function () {
const bin = make(test.vector, test.dtype_hex, test.padding);

const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});

it(`decode ${test.description}`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);

expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
});
}
});

describe('invalid', function () {
/**
* To prove correct in an invalid case (valid:false),
* one MUST raise an exception when attempting to encode
* a document from the numeric values, dtype, and padding.
*/
for (const test of invalid) {
const expectedErrorMessage = invalidTestExpectedError.get(test.description);

it(`bson: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.serialize({ bin });
// TODO(NODE-6537): The following validation MUST be a part of serialize
validateVector(bin);
} catch (error) {
thrownError = error;
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

it(`extended json: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.EJSON.stringify({ bin });
// TODO(NODE-6537): The following validation MUST be a part of stringify
validateVector(bin);
} catch (error) {
thrownError = error;
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
}
});
});
}
});
51 changes: 51 additions & 0 deletions test/node/specs/bson-binary-vector/float32.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
"test_key": "vector",
"tests": [
{
"description": "Simple Vector FLOAT32",
"valid": true,
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
},
{
"description": "Vector with decimals and negative value FLOAT32",
"valid": true,
"vector": [127.7, -7.7],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
},
{
"description": "Empty Vector FLOAT32",
"valid": true,
"vector": [],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009270000"
},
{
"description": "Infinity Vector FLOAT32",
"valid": true,
"vector": ["-inf", 0.0, "inf"],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
},
{
"description": "FLOAT32 with padding",
"valid": false,
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 3
}
]
}

57 changes: 57 additions & 0 deletions test/node/specs/bson-binary-vector/int8.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
"test_key": "vector",
"tests": [
{
"description": "Simple Vector INT8",
"valid": true,
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0,
"canonical_bson": "1600000005766563746F7200040000000903007F0700"
},
{
"description": "Empty Vector INT8",
"valid": true,
"vector": [],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009030000"
},
{
"description": "Overflow Vector INT8",
"valid": false,
"vector": [128],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
},
{
"description": "Underflow Vector INT8",
"valid": false,
"vector": [-129],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
},
{
"description": "INT8 with padding",
"valid": false,
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 3
},
{
"description": "INT8 with float inputs",
"valid": false,
"vector": [127.77, 7.77],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 0
}
]
}

Loading

0 comments on commit f6e86bb

Please sign in to comment.