@@ -2,13 +2,29 @@ import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-d
22import { BSON , type Document } from "bson" ;
33import type { UserConfig } from "../config.js" ;
44import type { ConnectionManager } from "../connectionManager.js" ;
5+ import z from "zod" ;
6+
7+ export const similarityEnum = z . enum ( [ "cosine" , "euclidean" , "dotProduct" ] ) ;
8+ export type Similarity = z . infer < typeof similarityEnum > ;
9+
10+ export const quantizationEnum = z . enum ( [ "none" , "scalar" , "binary" ] ) ;
11+ export type Quantization = z . infer < typeof quantizationEnum > ;
512
613export type VectorFieldIndexDefinition = {
714 type : "vector" ;
815 path : string ;
916 numDimensions : number ;
10- quantization : "none" | "scalar" | "binary" ;
11- similarity : "euclidean" | "cosine" | "dotProduct" ;
17+ quantization : Quantization ;
18+ similarity : Similarity ;
19+ } ;
20+
21+ export type VectorFieldValidationError = {
22+ path : string ;
23+ expectedNumDimensions : number ;
24+ expectedQuantization : Quantization ;
25+ actualNumDimensions : number | "unknown" ;
26+ actualQuantization : Quantization | "unknown" ;
27+ error : "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric" ;
1228} ;
1329
1430export type EmbeddingNamespace = `${string } .${string } `;
@@ -54,7 +70,7 @@ export class VectorSearchEmbeddingsManager {
5470 const vectorSearchIndexes = allSearchIndexes . filter ( ( index ) => index . type === "vectorSearch" ) ;
5571 const vectorFields = vectorSearchIndexes
5672 // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
57- . flatMap < Document > ( ( index ) => ( index . latestDefinition ?. fields as Document ) ?? [ ] )
73+ . flatMap < Document > ( ( index ) => ( index . latestDefinition ?. fields as Document [ ] ) ?? [ ] )
5874 . filter ( ( field ) => this . isVectorFieldIndexDefinition ( field ) ) ;
5975
6076 this . embeddings . set ( embeddingDefKey , vectorFields ) ;
@@ -73,7 +89,7 @@ export class VectorSearchEmbeddingsManager {
7389 collection : string ;
7490 } ,
7591 document : Document
76- ) : Promise < VectorFieldIndexDefinition [ ] > {
92+ ) : Promise < VectorFieldValidationError [ ] > {
7793 const provider = await this . assertAtlasSearchIsAvailable ( ) ;
7894 if ( ! provider ) {
7995 return [ ] ;
@@ -87,15 +103,15 @@ export class VectorSearchEmbeddingsManager {
87103 }
88104
89105 const embeddings = await this . embeddingsForNamespace ( { database, collection } ) ;
90- return embeddings . filter ( ( emb ) => ! this . documentPassesEmbeddingValidation ( emb , document ) ) ;
106+ return embeddings
107+ . map ( ( emb ) => this . getValidationErrorForDocument ( emb , document ) )
108+ . filter ( ( e ) => e !== undefined ) ;
91109 }
92110
93111 private async assertAtlasSearchIsAvailable ( ) : Promise < NodeDriverServiceProvider | null > {
94112 const connectionState = this . connectionManager . currentConnectionState ;
95- if ( connectionState . tag === "connected" ) {
96- if ( await connectionState . isSearchSupported ( ) ) {
97- return connectionState . serviceProvider ;
98- }
113+ if ( connectionState . tag === "connected" && ( await connectionState . isSearchSupported ( ) ) ) {
114+ return connectionState . serviceProvider ;
99115 }
100116
101117 return null ;
@@ -105,56 +121,99 @@ export class VectorSearchEmbeddingsManager {
105121 return doc [ "type" ] === "vector" ;
106122 }
107123
108- private documentPassesEmbeddingValidation ( definition : VectorFieldIndexDefinition , document : Document ) : boolean {
124+ private getValidationErrorForDocument (
125+ definition : VectorFieldIndexDefinition ,
126+ document : Document
127+ ) : VectorFieldValidationError | undefined {
109128 const fieldPath = definition . path . split ( "." ) ;
110129 let fieldRef : unknown = document ;
111130
131+ const constructError = (
132+ details : Partial < Pick < VectorFieldValidationError , "error" | "actualNumDimensions" | "actualQuantization" > >
133+ ) : VectorFieldValidationError => ( {
134+ path : definition . path ,
135+ expectedNumDimensions : definition . numDimensions ,
136+ expectedQuantization : definition . quantization ,
137+ actualNumDimensions : details . actualNumDimensions ?? "unknown" ,
138+ actualQuantization : details . actualQuantization ?? "unknown" ,
139+ error : details . error ?? "not-a-vector" ,
140+ } ) ;
141+
112142 for ( const field of fieldPath ) {
113143 if ( fieldRef && typeof fieldRef === "object" && field in fieldRef ) {
114144 fieldRef = ( fieldRef as Record < string , unknown > ) [ field ] ;
115145 } else {
116- return true ;
146+ return undefined ;
117147 }
118148 }
119149
120150 switch ( definition . quantization ) {
121151 // Because quantization is not defined by the user
122152 // we have to trust them in the format they use.
123153 case "none" :
124- return true ;
154+ return undefined ;
125155 case "scalar" :
126156 case "binary" :
127157 if ( fieldRef instanceof BSON . Binary ) {
128158 try {
129159 const elements = fieldRef . toFloat32Array ( ) ;
130- return elements . length === definition . numDimensions ;
160+ if ( elements . length !== definition . numDimensions ) {
161+ return constructError ( {
162+ actualNumDimensions : elements . length ,
163+ actualQuantization : "binary" ,
164+ error : "dimension-mismatch" ,
165+ } ) ;
166+ }
167+
168+ return undefined ;
131169 } catch {
132170 // bits are also supported
133171 try {
134172 const bits = fieldRef . toBits ( ) ;
135- return bits . length === definition . numDimensions ;
173+ if ( bits . length !== definition . numDimensions ) {
174+ return constructError ( {
175+ actualNumDimensions : bits . length ,
176+ actualQuantization : "binary" ,
177+ error : "dimension-mismatch" ,
178+ } ) ;
179+ }
180+
181+ return undefined ;
136182 } catch {
137- return false ;
183+ return constructError ( {
184+ actualQuantization : "binary" ,
185+ error : "not-a-vector" ,
186+ } ) ;
138187 }
139188 }
140189 } else {
141190 if ( ! Array . isArray ( fieldRef ) ) {
142- return false ;
191+ return constructError ( {
192+ error : "not-a-vector" ,
193+ } ) ;
143194 }
144195
145196 if ( fieldRef . length !== definition . numDimensions ) {
146- return false ;
197+ return constructError ( {
198+ actualNumDimensions : fieldRef . length ,
199+ actualQuantization : "scalar" ,
200+ error : "dimension-mismatch" ,
201+ } ) ;
147202 }
148203
149204 if ( ! fieldRef . every ( ( e ) => this . isANumber ( e ) ) ) {
150- return false ;
205+ return constructError ( {
206+ actualNumDimensions : fieldRef . length ,
207+ actualQuantization : "scalar" ,
208+ error : "not-numeric" ,
209+ } ) ;
151210 }
152211 }
153212
154213 break ;
155214 }
156215
157- return true ;
216+ return undefined ;
158217 }
159218
160219 private isANumber ( value : unknown ) : boolean {
0 commit comments