-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseed.ts
50 lines (43 loc) · 1.06 KB
/
seed.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import dotenv from "dotenv"
dotenv.config()
import csv from "csv-parser"
import fs from "fs"
import { Index } from "@upstash/vector"
const index = new Index({
url: process.env.UPSTASH_VECTOR_REST_URL,
token: process.env.UPSTASH_VECTOR_REST_TOKEN,
})
interface Row {
text: string
}
async function parseCSV(filePath: string): Promise<Row[]>{
return new Promise((resolve, reject) => {
const rows: Row[] = []
fs.createReadStream(filePath)
.pipe(csv({separator: ","}))
.on("data", (row) => {
rows.push(row)
})
.on("error", (err) => {
reject(err)
})
.on("end", () => {
resolve(rows)
})
})
}
const STEP = 30
const seed = async () => {
const data = await parseCSV("dataset/training_data.csv")
for(let i = 0; i<data.length;i+=STEP){
const chunk = data.slice(i, i+STEP)
const formatted = chunk.map((row, batchIndex) => ({
data: row.text,
id: i+batchIndex,
metadata: {text: row.text}
})
)
await index.upsert(formatted)
}
}
seed()