-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample.dart
89 lines (80 loc) · 3.09 KB
/
example.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import 'dart:convert';
import 'dart:io';
import 'package:http/http.dart' as http;
import 'package:pgvector/pgvector.dart';
import 'package:postgres/postgres.dart';
Future<List<List<double>>> embed(List<String> input, String taskType) async {
// nomic-embed-text uses a task prefix
// https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
input = [for (var v in input) taskType + ': ' + v];
var url = Uri.http('localhost:11434', 'api/embed');
var headers = {'Content-Type': 'application/json'};
var data = {'input': input, 'model': 'nomic-embed-text'};
var response = await http.post(url, body: jsonEncode(data), headers: headers);
var embeddings = jsonDecode(response.body)['embeddings'];
return Future.value([for (var v in embeddings) List<double>.from(v)]);
}
void main() async {
var connection = await Connection.open(
Endpoint(
host: 'localhost',
port: 5432,
database: 'pgvector_example',
username: Platform.environment['USER']),
settings: ConnectionSettings(
sslMode: SslMode.disable,
typeRegistry: TypeRegistry(encoders: [pgvectorEncoder])));
await connection.execute('CREATE EXTENSION IF NOT EXISTS vector');
await connection.execute('DROP TABLE IF EXISTS documents');
await connection.execute(
'CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(768))');
await connection.execute(
"CREATE INDEX ON documents USING GIN (to_tsvector('english', content))");
var input = [
'The dog is barking',
'The cat is purring',
'The bear is growling'
];
var embeddings = await embed(input, 'search_document');
for (var i = 0; i < input.length; i++) {
await connection.execute(
Sql.named(
'INSERT INTO documents (content, embedding) VALUES (@content, @embedding)'),
parameters: {'content': input[i], 'embedding': Vector(embeddings[i])});
}
var sql = """
WITH semantic_search AS (
SELECT id, RANK () OVER (ORDER BY embedding <=> @embedding) AS rank
FROM documents
ORDER BY embedding <=> @embedding
LIMIT 20
),
keyword_search AS (
SELECT id, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC)
FROM documents, plainto_tsquery('english', @query) query
WHERE to_tsvector('english', content) @@ query
ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC
LIMIT 20
)
SELECT
COALESCE(semantic_search.id, keyword_search.id) AS id,
COALESCE(1.0 / (@k + semantic_search.rank), 0.0) +
COALESCE(1.0 / (@k + keyword_search.rank), 0.0) AS score
FROM semantic_search
FULL OUTER JOIN keyword_search ON semantic_search.id = keyword_search.id
ORDER BY score DESC
LIMIT 5
""";
var query = 'growling bear';
var queryEmbedding = (await embed([query], 'search_query'))[0];
var k = 60;
var result = await connection.execute(Sql.named(sql), parameters: {
'query': query,
'embedding': Vector(queryEmbedding),
'k': k
});
for (final row in result) {
print('document: ${row[0]}, RRF score: ${row[1]}');
}
await connection.close();
}