-
Notifications
You must be signed in to change notification settings - Fork 0
/
db.js
111 lines (99 loc) · 3.31 KB
/
db.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
const Hive = require("./hive.js");
const path = require("path");
const config = require("./config.js");
const fs = require("fs");
const { exec } = require("child_process");
function vdb() {}
vdb.init = async function (
query = "Human Factors Workscope",
) {
vdb.pipeline;
this.dbFile = "./db/Documents/db.json";
this.index = {};
this.useLlamaEmbedding = false;
this.dataChannel = new Map();
this.dataChannel.set("Documents", {
datastream: "Documents",
datafolder: "./docs",
slice: 1024,
vectordb: "Documents.js"
});
this.dataChannel.set("MongoDB", {
datastream: "MongoDB",
database: "fortknox",
collection: "clientlist",
url: "MongoDB://localhost:27017/",
vectordb: "Mongodb.js",
slice: 2000,
});
this.dataChannel.set("WebSearch", { datastream: "WebSearch", slice: 2000 });
await vdb.initVectorDB();
return await vdb.query(query);
};
vdb.initVectorDB = async function (type = "Documents") {
const indexPath = path.join(__dirname, "db", this.dataChannel.get(type).datastream);
await Hive.init(indexPath, this.dbFile, this.dataChannel.get(type).datafolder);
};
vdb.query = async function (query, database = "Documents") {
const vector = await Hive.getVector(query, Hive.TransOptions);
results = await Hive.find(vector.data, 5);
results = results.reduce((acc, r) => {
if (r.similarity > 0.2) {
console.log(r.similarity);
r.document.meta.href = path.relative(this.dataChannel.get(database).datafolder, r.document.meta.href);
acc.push(r.document.meta); // Add the filtered item to the accumulator
}
return acc; // Always return the accumulator
}, []);
console.log(results);
return results;
};
vdb.pullDatabase = async function () {
const mdb = require("./mgdb.js");
var cfg = this.dataChannel.get("MongoDB");
var mjdb = new mdb(cfg.url, cfg.database);
var documents = await mjdb.find(cfg.collection, {});
// vdb.getSum = await vdb.sumInit();
for (let i = 0; i < documents.length; i++) {
await Hive.addItem(vdb.sentanceCompose(documents[i]));
}
};
vdb.sumInit = async function () {
return await this.pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
};
vdb.sentanceCompose = function (data) {
let values = [];
for (const [key, value] of Object.entries(data)) {
if (!key.includes("id")) {
if (value !== "" && value !== null && value !== undefined) {
values.push(`${key} : ${value}`); //Return key value pair as string
}
}
}
// console.log(values.join(", "));
return values.join(" ");
};
vdb.getLlamaEmbedding = function (text) {
// console.log("text", text);
return new Promise((resolve, reject) => {
const llamaembed = config.llamacpp.replace("llama-cli", "llama-embedding");
const embedmodel = config.llamacpp.replace("llama-cli", "snowflake-q8_0.gguf");
exec(`${llamaembed} -m ${embedmodel} -e -p "${text}" --embd-output-format array -ngl 99`, (error, stdout, stderr) => {
if (error) {
reject(`error: ${error.message}`);
}
try{
if (stdout.includes("nan")) return;
const vector = JSON.parse(stdout);
// console.log("vector", vector);
resolve({ data: vector });
}catch(e){
console.log(e);
reject(`error: ${e.message}`);
}
});
});
};
try {
module.exports = exports = vdb;
} catch (e) {}