Skip to content

Commit 0b4d22a

Browse files
authored
fix: Validate input file for Vectorize inserts (#9048)
* fix: Validate input file for Vectorize inserts * VS-398: Throw user errors instead of logging error and returning early for Vectorize commands
1 parent 137d2da commit 0b4d22a

File tree

10 files changed

+93
-54
lines changed

10 files changed

+93
-54
lines changed

.changeset/stupid-paws-slide.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"wrangler": patch
3+
---
4+
5+
fix: Validate input file for Vectorize inserts

packages/wrangler/src/__tests__/vectorize/vectorize.test.ts

Lines changed: 23 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -300,13 +300,9 @@ describe("vectorize commands", () => {
300300

301301
await expect(
302302
runWrangler("vectorize create test-index --dimensions=1536")
303-
).resolves.toBeUndefined();
304-
305-
expect(std.err).toMatchInlineSnapshot(`
306-
"X [ERROR] You must provide both dimensions and a metric, or a known model preset when creating an index.
307-
308-
"
309-
`);
303+
).rejects.toThrowErrorMatchingInlineSnapshot(
304+
`[Error: 🚨 You must provide both dimensions and a metric, or a known model preset when creating an index.]`
305+
);
310306
});
311307

312308
it("should handle listing vectorize V1 indexes", async () => {
@@ -462,13 +458,12 @@ describe("vectorize commands", () => {
462458

463459
it("should log error when getByIds does not receive ids", async () => {
464460
mockVectorizeV2Request();
465-
await runWrangler("vectorize get-vectors test-index --ids");
466-
467-
expect(std.err).toMatchInlineSnapshot(`
468-
"X [ERROR] 🚨 Please provide valid vector identifiers.
469461

470-
"
471-
`);
462+
await expect(
463+
runWrangler("vectorize get-vectors test-index --ids")
464+
).rejects.toThrowErrorMatchingInlineSnapshot(
465+
`[Error: 🚨 Please provide valid vector identifiers.]`
466+
);
472467
});
473468

474469
it("should handle a deleteByIds on a vectorize index", async () => {
@@ -482,13 +477,12 @@ describe("vectorize commands", () => {
482477

483478
it("should log error when deleteByIds does not receive ids", async () => {
484479
mockVectorizeV2Request();
485-
await runWrangler("vectorize delete-vectors test-index --ids");
486-
487-
expect(std.err).toMatchInlineSnapshot(`
488-
"X [ERROR] 🚨 Please provide valid vector identifiers for deletion.
489480

490-
"
491-
`);
481+
await expect(
482+
runWrangler("vectorize delete-vectors test-index --ids")
483+
).rejects.toThrowErrorMatchingInlineSnapshot(
484+
`[Error: 🚨 Please provide valid vector identifiers for deletion.]`
485+
);
492486
});
493487

494488
it("should handle a query on a vectorize index", async () => {
@@ -551,30 +545,22 @@ describe("vectorize commands", () => {
551545

552546
it("should fail query when neither vector nor vector-id is provided", async () => {
553547
mockVectorizeV2RequestError();
554-
await runWrangler(
555-
"vectorize query test-index --top-k=2 --return-values=true"
548+
await expect(
549+
runWrangler("vectorize query test-index --top-k=2 --return-values=true")
550+
).rejects.toThrowErrorMatchingInlineSnapshot(
551+
`[Error: 🚨 Either vector or vector-id parameter must be provided, but not both.]`
556552
);
557-
expect(std.out).toMatchInlineSnapshot(`""`);
558-
559-
expect(std.err).toMatchInlineSnapshot(`
560-
"X [ERROR] 🚨 Either vector or vector-id parameter must be provided, but not both.
561-
562-
"
563-
`);
564553
});
565554

566555
it("should fail query when both vector and vector-id are provided", async () => {
567556
mockVectorizeV2RequestError();
568-
await runWrangler(
569-
"vectorize query test-index --vector 1 2 3 '4' --vector-id some-vector-id"
557+
await expect(
558+
runWrangler(
559+
"vectorize query test-index --vector 1 2 3 '4' --vector-id some-vector-id"
560+
)
561+
).rejects.toThrowErrorMatchingInlineSnapshot(
562+
`[Error: 🚨 Either vector or vector-id parameter must be provided, but not both.]`
570563
);
571-
expect(std.out).toMatchInlineSnapshot(`""`);
572-
573-
expect(std.err).toMatchInlineSnapshot(`
574-
"X [ERROR] 🚨 Either vector or vector-id parameter must be provided, but not both.
575-
576-
"
577-
`);
578564
});
579565

580566
it("should fail query with invalid return-metadata flag", async () => {

packages/wrangler/src/__tests__/vectorize/vectorize.upsert.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,4 +222,22 @@ describe("dataset upsert", () => {
222222
✅ Successfully enqueued 5 vectors into index 'my-index' for upsertion."
223223
`);
224224
});
225+
226+
it("should reject an invalid file param", async () => {
227+
await expect(
228+
runWrangler("vectorize upsert my-index --file invalid_vectors.ndjson")
229+
).rejects.toThrowErrorMatchingInlineSnapshot(
230+
`[Error: 🚨 Cannot read invalid or empty file: invalid_vectors.ndjson.]`
231+
);
232+
});
233+
234+
it("should reject an empty file param", async () => {
235+
writeFileSync("empty_vectors.ndjson", "");
236+
237+
await expect(
238+
runWrangler("vectorize upsert my-index --file empty_vectors.ndjson")
239+
).rejects.toThrowErrorMatchingInlineSnapshot(
240+
`[Error: 🚨 Cannot read invalid or empty file: empty_vectors.ndjson.]`
241+
);
242+
});
225243
});

packages/wrangler/src/vectorize/common.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { access, constants, stat } from "node:fs/promises";
12
import { logger } from "../logger";
23
import type { Interface as RLInterface } from "node:readline";
34

@@ -8,6 +9,20 @@ export const VECTORIZE_MAX_BATCH_SIZE = 5_000;
89
export const VECTORIZE_UPSERT_BATCH_SIZE = VECTORIZE_V1_MAX_BATCH_SIZE;
910
export const VECTORIZE_MAX_UPSERT_VECTOR_RECORDS = 100_000;
1011

12+
// Helper function to carry out initial validations to a file supplied for
13+
// Vectorize upserts/inserts. This function returns true only if the file exists,
14+
// can be read, and is non-empty.
15+
export async function isValidFile(path: string): Promise<boolean> {
16+
try {
17+
await access(path, constants.R_OK);
18+
19+
const fileStat = await stat(path);
20+
return fileStat.isFile() && fileStat.size > 0;
21+
} catch (err) {
22+
return false;
23+
}
24+
}
25+
1126
// helper method that reads an ndjson file line by line in batches. not this doesn't
1227
// actually do any parsing - that will be handled on the backend
1328
// https://nodejs.org/docs/latest-v16.x/api/readline.html#rlsymbolasynciterator

packages/wrangler/src/vectorize/create.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { configFileName, formatConfigSnippet } from "../config";
22
import { createCommand } from "../core/create-command";
3+
import { UserError } from "../errors";
34
import { logger } from "../logger";
45
import { createIndex } from "./client";
56
import { deprecatedV1DefaultFlag } from "./common";
@@ -76,10 +77,9 @@ export const vectorizeCreateCommand = createCommand({
7677
dimensions: args.dimensions,
7778
};
7879
} else {
79-
logger.error(
80-
"You must provide both dimensions and a metric, or a known model preset when creating an index."
80+
throw new UserError(
81+
"🚨 You must provide both dimensions and a metric, or a known model preset when creating an index."
8182
);
82-
return;
8383
}
8484

8585
if (args.deprecatedV1) {

packages/wrangler/src/vectorize/deleteByIds.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { createCommand } from "../core/create-command";
2+
import { UserError } from "../errors";
23
import { logger } from "../logger";
34
import { deleteByIds } from "./client";
45
import type { VectorizeVectorIds } from "./types";
@@ -26,8 +27,9 @@ export const vectorizeDeleteVectorsCommand = createCommand({
2627
positionalArgs: ["name"],
2728
async handler(args, { config }) {
2829
if (args.ids.length === 0) {
29-
logger.error("🚨 Please provide valid vector identifiers for deletion.");
30-
return;
30+
throw new UserError(
31+
"🚨 Please provide valid vector identifiers for deletion."
32+
);
3133
}
3234

3335
logger.log(`📋 Deleting vectors...`);

packages/wrangler/src/vectorize/getByIds.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { createCommand } from "../core/create-command";
2+
import { UserError } from "../errors";
23
import { logger } from "../logger";
34
import { getByIds } from "./client";
45
import type { VectorizeVectorIds } from "./types";
@@ -26,8 +27,7 @@ export const vectorizeGetVectorsCommand = createCommand({
2627
positionalArgs: ["name"],
2728
async handler(args, { config }) {
2829
if (args.ids.length === 0) {
29-
logger.error("🚨 Please provide valid vector identifiers.");
30-
return;
30+
throw new UserError("🚨 Please provide valid vector identifiers.");
3131
}
3232

3333
logger.log(`📋 Fetching vectors...`);

packages/wrangler/src/vectorize/insert.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ import { createReadStream } from "node:fs";
22
import { createInterface } from "node:readline";
33
import { File, FormData } from "undici";
44
import { createCommand } from "../core/create-command";
5+
import { UserError } from "../errors";
56
import { logger } from "../logger";
67
import { insertIntoIndex, insertIntoIndexV1 } from "./client";
78
import {
89
deprecatedV1DefaultFlag,
910
getBatchFromFile,
11+
isValidFile,
1012
VECTORIZE_MAX_BATCH_SIZE,
1113
VECTORIZE_MAX_UPSERT_VECTOR_RECORDS,
1214
VECTORIZE_UPSERT_BATCH_SIZE,
@@ -54,24 +56,28 @@ export const vectorizeInsertCommand = createCommand({
5456
},
5557
positionalArgs: ["name"],
5658
async handler(args, { config }) {
59+
if (!(await isValidFile(args.file))) {
60+
throw new UserError(
61+
`🚨 Cannot read invalid or empty file: ${args.file}.`
62+
);
63+
}
64+
5765
const rl = createInterface({ input: createReadStream(args.file) });
5866

5967
if (
6068
args.deprecatedV1 &&
6169
Number(args.batchSize) > VECTORIZE_V1_MAX_BATCH_SIZE
6270
) {
63-
logger.error(
71+
throw new UserError(
6472
`🚨 Vectorize currently limits upload batches to ${VECTORIZE_V1_MAX_BATCH_SIZE} records at a time.`
6573
);
66-
return;
6774
} else if (
6875
!args.deprecatedV1 &&
6976
Number(args.batchSize) > VECTORIZE_MAX_BATCH_SIZE
7077
) {
71-
logger.error(
72-
`🚨 The global rate limit for the Cloudflare API is 1200 requests per five minutes. Vectorize V2 indexes currently limit upload batches to ${VECTORIZE_MAX_BATCH_SIZE} records at a time to stay within the service limits`
78+
throw new UserError(
79+
`🚨 The global rate limit for the Cloudflare API is 1200 requests per five minutes. Vectorize V2 indexes currently limit upload batches to ${VECTORIZE_MAX_BATCH_SIZE} records at a time to stay within the service limits.`
7380
);
74-
return;
7581
}
7682

7783
let vectorInsertCount = 0;

packages/wrangler/src/vectorize/query.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { createCommand } from "../core/create-command";
2+
import { UserError } from "../errors";
23
import { logger } from "../logger";
34
import { queryIndexByVector, queryIndexByVectorId } from "./client";
45
import type {
@@ -115,10 +116,9 @@ export const vectorizeQueryCommand = createCommand({
115116
(args.vector === undefined && args.vectorId === undefined) ||
116117
(args.vector !== undefined && args.vectorId !== undefined)
117118
) {
118-
logger.error(
119+
throw new UserError(
119120
"🚨 Either vector or vector-id parameter must be provided, but not both."
120121
);
121-
return;
122122
}
123123

124124
logger.log(`📋 Searching for relevant vectors...`);

packages/wrangler/src/vectorize/upsert.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ import { createReadStream } from "node:fs";
22
import { createInterface } from "node:readline";
33
import { File, FormData } from "undici";
44
import { createCommand } from "../core/create-command";
5+
import { UserError } from "../errors";
56
import { logger } from "../logger";
67
import { upsertIntoIndex } from "./client";
78
import {
89
getBatchFromFile,
10+
isValidFile,
911
VECTORIZE_MAX_BATCH_SIZE,
1012
VECTORIZE_MAX_UPSERT_VECTOR_RECORDS,
1113
} from "./common";
@@ -45,13 +47,18 @@ export const vectorizeUpsertCommand = createCommand({
4547
},
4648
positionalArgs: ["name"],
4749
async handler(args, { config }) {
50+
if (!(await isValidFile(args.file))) {
51+
throw new UserError(
52+
`🚨 Cannot read invalid or empty file: ${args.file}.`
53+
);
54+
}
55+
4856
const rl = createInterface({ input: createReadStream(args.file) });
4957

5058
if (Number(args.batchSize) > VECTORIZE_MAX_BATCH_SIZE) {
51-
logger.error(
52-
`🚨 The global rate limit for the Cloudflare API is 1200 requests per five minutes. Vectorize indexes currently limit upload batches to ${VECTORIZE_MAX_BATCH_SIZE} records at a time to stay within the service limits`
59+
throw new UserError(
60+
`🚨 The global rate limit for the Cloudflare API is 1200 requests per five minutes. Vectorize indexes currently limit upload batches to ${VECTORIZE_MAX_BATCH_SIZE} records at a time to stay within the service limits.`
5361
);
54-
return;
5562
}
5663

5764
let vectorUpsertCount = 0;

0 commit comments

Comments
 (0)