11import { expect } from 'chai' ;
2+ import * as net from 'net' ;
23import * as sinon from 'sinon' ;
4+ import { inspect } from 'util' ;
35
46import {
57 BSON ,
8+ BSONError ,
9+ type Collection ,
10+ deserialize ,
611 type MongoClient ,
7- MongoDBResponse ,
812 MongoServerError ,
13+ OnDemandDocument ,
914 OpMsgResponse
1015} from '../../../mongodb' ;
1116
@@ -23,12 +28,12 @@ describe('class MongoDBResponse', () => {
2328 let bsonSpy : sinon . SinonSpy ;
2429
2530 beforeEach ( ( ) => {
26- bsonSpy = sinon . spy ( MongoDBResponse . prototype , 'parseBsonSerializationOptions' ) ;
31+ // @ts -expect-error private function
32+ bsonSpy = sinon . spy ( OnDemandDocument . prototype , 'parseBsonSerializationOptions' ) ;
2733 } ) ;
2834
2935 afterEach ( ( ) => {
3036 bsonSpy ?. restore ( ) ;
31- // @ts -expect-error: Allow this to be garbage collected
3237 bsonSpy = null ;
3338 } ) ;
3439
@@ -153,3 +158,180 @@ describe('class MongoDBResponse', () => {
153158 }
154159 ) ;
155160} ) ;
161+
162+ describe ( 'utf8 validation with cursors' , function ( ) {
163+ let client : MongoClient ;
164+ let collection : Collection ;
165+
166+ /**
167+ * Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
168+ * for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
169+ * bytes of the character 'é', to produce invalid utf8.
170+ */
171+ async function insertDocumentWithInvalidUTF8 ( ) {
172+ const stub = sinon . stub ( net . Socket . prototype , 'write' ) . callsFake ( function ( ...args ) {
173+ const providedBuffer = args [ 0 ] . toString ( 'hex' ) ;
174+ const targetBytes = Buffer . from ( document . field , 'utf-8' ) . toString ( 'hex' ) ;
175+
176+ if ( providedBuffer . includes ( targetBytes ) ) {
177+ if ( providedBuffer . split ( targetBytes ) . length !== 2 ) {
178+ sinon . restore ( ) ;
179+ const message = `too many target bytes sequences: received ${ providedBuffer . split ( targetBytes ) . length } \n. command: ${ inspect ( deserialize ( args [ 0 ] ) , { depth : Infinity } ) } ` ;
180+ throw new Error ( message ) ;
181+ }
182+ const buffer = Buffer . from ( providedBuffer . replace ( targetBytes , 'c301' . repeat ( 8 ) ) , 'hex' ) ;
183+ const result = stub . wrappedMethod . apply ( this , [ buffer ] ) ;
184+ sinon . restore ( ) ;
185+ return result ;
186+ }
187+ const result = stub . wrappedMethod . apply ( this , args ) ;
188+ return result ;
189+ } ) ;
190+
191+ const document = {
192+ field : 'é' . repeat ( 8 )
193+ } ;
194+
195+ await collection . insertOne ( document ) ;
196+
197+ sinon . restore ( ) ;
198+ }
199+
200+ beforeEach ( async function ( ) {
201+ client = this . configuration . newClient ( ) ;
202+ await client . connect ( ) ;
203+ const db = client . db ( 'test' ) ;
204+ collection = db . collection ( 'invalidutf' ) ;
205+
206+ await collection . deleteMany ( { } ) ;
207+ await insertDocumentWithInvalidUTF8 ( ) ;
208+ } ) ;
209+
210+ afterEach ( async function ( ) {
211+ sinon . restore ( ) ;
212+ await client . close ( ) ;
213+ } ) ;
214+
215+ context ( 'when utf-8 validation is explicitly disabled' , function ( ) {
216+ it ( 'documents can be read using a for-await loop without errors' , async function ( ) {
217+ for await ( const _doc of collection . find ( { } , { enableUtf8Validation : false } ) ) ;
218+ } ) ;
219+ it ( 'documents can be read using next() without errors' , async function ( ) {
220+ const cursor = collection . find ( { } , { enableUtf8Validation : false } ) ;
221+
222+ while ( await cursor . hasNext ( ) ) {
223+ await cursor . next ( ) ;
224+ }
225+ } ) ;
226+
227+ it ( 'documents can be read using toArray() without errors' , async function ( ) {
228+ const cursor = collection . find ( { } , { enableUtf8Validation : false } ) ;
229+ await cursor . toArray ( ) ;
230+ } ) ;
231+
232+ it ( 'documents can be read using .stream() without errors' , async function ( ) {
233+ const cursor = collection . find ( { } , { enableUtf8Validation : false } ) ;
234+ await cursor . stream ( ) . toArray ( ) ;
235+ } ) ;
236+
237+ it ( 'documents can be read with tryNext() without error' , async function ( ) {
238+ const cursor = collection . find ( { } , { enableUtf8Validation : false } ) ;
239+
240+ while ( await cursor . hasNext ( ) ) {
241+ await cursor . tryNext ( ) ;
242+ }
243+ } ) ;
244+ } ) ;
245+
246+ async function expectReject ( fn : ( ) => Promise < void > ) {
247+ try {
248+ await fn ( ) ;
249+ expect . fail ( 'expected the provided callback function to reject, but it did not.' ) ;
250+ } catch ( error ) {
251+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / ) ;
252+ expect ( error ) . to . be . instanceOf ( BSONError ) ;
253+ }
254+ }
255+
256+ context ( 'when utf-8 validation is explicitly enabled' , function ( ) {
257+ it ( 'a for-await loop throw a BSON error' , async function ( ) {
258+ await expectReject ( async ( ) => {
259+ for await ( const _doc of collection . find ( { } , { enableUtf8Validation : true } ) ) ;
260+ } ) ;
261+ } ) ;
262+ it ( 'next() throws a BSON error' , async function ( ) {
263+ await expectReject ( async ( ) => {
264+ const cursor = collection . find ( { } , { enableUtf8Validation : true } ) ;
265+
266+ while ( await cursor . hasNext ( ) ) {
267+ await cursor . next ( ) ;
268+ }
269+ } ) ;
270+ } ) ;
271+
272+ it ( 'toArray() throws a BSON error' , async function ( ) {
273+ await expectReject ( async ( ) => {
274+ const cursor = collection . find ( { } , { enableUtf8Validation : true } ) ;
275+ await cursor . toArray ( ) ;
276+ } ) ;
277+ } ) ;
278+
279+ it ( '.stream() throws a BSONError' , async function ( ) {
280+ await expectReject ( async ( ) => {
281+ const cursor = collection . find ( { } , { enableUtf8Validation : true } ) ;
282+ await cursor . stream ( ) . toArray ( ) ;
283+ } ) ;
284+ } ) ;
285+
286+ it ( 'tryNext() throws a BSONError' , async function ( ) {
287+ await expectReject ( async ( ) => {
288+ const cursor = collection . find ( { } , { enableUtf8Validation : true } ) ;
289+
290+ while ( await cursor . hasNext ( ) ) {
291+ await cursor . tryNext ( ) ;
292+ }
293+ } ) ;
294+ } ) ;
295+ } ) ;
296+
297+ context ( 'utf-8 validation defaults to enabled' , function ( ) {
298+ it ( 'a for-await loop throw a BSON error' , async function ( ) {
299+ await expectReject ( async ( ) => {
300+ for await ( const _doc of collection . find ( { } ) ) ;
301+ } ) ;
302+ } ) ;
303+ it ( 'next() throws a BSON error' , async function ( ) {
304+ await expectReject ( async ( ) => {
305+ const cursor = collection . find ( { } ) ;
306+
307+ while ( await cursor . hasNext ( ) ) {
308+ await cursor . next ( ) ;
309+ }
310+ } ) ;
311+ } ) ;
312+
313+ it ( 'toArray() throws a BSON error' , async function ( ) {
314+ await expectReject ( async ( ) => {
315+ const cursor = collection . find ( { } ) ;
316+ await cursor . toArray ( ) ;
317+ } ) ;
318+ } ) ;
319+
320+ it ( '.stream() throws a BSONError' , async function ( ) {
321+ await expectReject ( async ( ) => {
322+ const cursor = collection . find ( { } ) ;
323+ await cursor . stream ( ) . toArray ( ) ;
324+ } ) ;
325+ } ) ;
326+
327+ it ( 'tryNext() throws a BSONError' , async function ( ) {
328+ await expectReject ( async ( ) => {
329+ const cursor = collection . find ( { } , { enableUtf8Validation : true } ) ;
330+
331+ while ( await cursor . hasNext ( ) ) {
332+ await cursor . tryNext ( ) ;
333+ }
334+ } ) ;
335+ } ) ;
336+ } ) ;
337+ } ) ;
0 commit comments