Skip to content

Commit

Permalink
VSS based filtering in training snippets component
Browse files Browse the repository at this point in the history
  • Loading branch information
polterguy committed Nov 10, 2023
1 parent 1375595 commit a53b208
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 7 deletions.
132 changes: 132 additions & 0 deletions backend/files/system/magic/ml_training_snippets-vss.get.hl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@

// CRUD read endpoint with paging, and VSS filtering selecting records from your ml_training_snippets table in your magic database returning id, created, type, pushed, uri, prompt, completion filtering on id, created, type, pushed, uri, prompt, completion with authentication and authorisation for root roles
.arguments
limit:long
offset:long
filter:string
type:string
.description:CRUD read endpoint with paging, and VSS filtering selecting records from your ml_training_snippets table in your magic database returning id, created, type, pushed, uri, prompt, completion filtering on id, created, type, pushed, uri, prompt, completion with authentication and authorisation for root roles
.type:crud-read

// Verifying user is authorized to access endpoint.
auth.ticket.verify:root

// Sanity checking invocation.
validators.mandatory:x:@.arguments/*/filter
validators.string:x:@.arguments/*/filter
min:1
max:1000

// Applying defaults.
validators.default:x:@.arguments
limit:int:10

// Retrieving embeddings.
.token
set-value:x:@.token
strings.concat
.:"Bearer "
config.get:"magic:openai:key"

// Retrieving embedding for prompt.
http.post:"https://api.openai.com/v1/embeddings"
headers
Authorization:x:@.token
Content-Type:application/json
payload
input:x:@.arguments/*/filter
model:text-embedding-ada-002
convert:true

// Sanity checking above invocation.
if
not
and
mte:x:@http.post
.:int:200
lt:x:@http.post
.:int:300
.lambda

// Oops, error - Logging error and returning OpenAI's HTTP status code to caller.
lambda2hyper:x:@http.post
log.error:Something went wrong while invoking OpenAI
message:x:@http.post/*/content/*/error/*/message
error:x:@lambda2hyper
throw:x:@http.post/*/content/*/error/*/message
public:bool:true
status:x:@http.post

// Converting from JSON string to byte array.
floatArray2bytes:x:@http.post/*/content/*/data/0/*/embedding/*

// Opening up our database connection.
data.connect:[generic|magic]

.sql:@"
select id, created, type, pushed, uri, prompt, completion, filename, cached, meta, embedding as embedding_vss
from vss_ml_training_snippets as vss
inner join ml_training_snippets ts on ts.id = vss.rowid
where
vss_search(vss.embedding_vss, @embedding)"

// Further parametrising invocation if we should.
if
exists:x:@.arguments/*/type
.lambda
set-value:x:@.sql
strings.concat
get-value:x:@.sql
.:" and type = @type"
unwrap:x:+/*/*
add:x:@if/./*/data.select
.
@type:x:@.arguments/*/type
if
exists:x:@.arguments/*/limit
.lambda
set-value:x:@.sql
strings.concat
get-value:x:@.sql
.:" limit "
get-value:x:@.arguments/*/limit
if
exists:x:@.arguments/*/offset
.lambda
set-value:x:@.sql
strings.concat
get-value:x:@.sql
.:" offset "
get-value:x:@.arguments/*/offset

// Executing SQL towards database.
log.info:x:@.sql
data.select:x:@.sql
@embedding:x:@floatArray2bytes

// Changing embedding to a boolean value to preserve bandwidth, and returning token count.
for-each:x:@data.select/*

// Changing embedding to boolean to preserve bandwidth.
if
not-null:x:@.dp/#/*/embedding_vss
.lambda
set-value:x:@.dp/#/*/embedding_vss
.:bool:true
else
set-value:x:@.dp/#/*/embedding_vss
.:bool:false

// Adding token count for each snippet.
strings.concat
get-value:x:@.dp/#/*/prompt
.:"\r\n\r\n"
get-value:x:@.dp/#/*/completion
openai.tokenize:x:@strings.concat
unwrap:x:+/*/*
add:x:@.dp/#
.
tokens:x:@openai.tokenize

// Returning result of above read invocation to caller.
return-nodes:x:@data.select/*
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,58 @@ export class MachineLearningTrainingService {
/**
* Returns all training snippets from backend matching filter condition.
*/
ml_training_snippets(filter?: any) {
ml_training_snippets(filter?: any, filterOnVss: boolean = false) {

return this.httpService.get<any>('/magic/system/magic/ml_training_snippets' + this.queryArgService.getQueryArgs(filter));
if (filterOnVss) {

const nFilter = {
filter: filter['ml_training_snippets.uri.like'].substring(0, filter['ml_training_snippets.uri.like'].length - 1),
};
if (filter['ml_training_snippets.type.eq']) {
nFilter['type'] = filter['ml_training_snippets.type.eq'];
}
if (filter['limit']) {
nFilter['limit'] = filter['limit'];
}
if (filter['offset']) {
nFilter['offset'] = filter['offset'];
}

return this.httpService.get<any>(
'/magic/system/magic/ml_training_snippets-vss' +
this.queryArgService.getQueryArgs(nFilter));

} else {

return this.httpService.get<any>(
'/magic/system/magic/ml_training_snippets' +
this.queryArgService.getQueryArgs(filter));
}
}

/**
* Counts training snippets matching condition.
*/
ml_training_snippets_count(filter?: any) {
ml_training_snippets_count(filter?: any, filterOnVss: boolean = false) {

if (filterOnVss) {

const nFilter = {
};
if (filter['ml_training_snippets.type.eq']) {
nFilter['type'] = filter['ml_training_snippets.type.eq'];
}

return this.httpService.get<Count>(
'/magic/system/magic/ml_training_snippets-count' +
this.queryArgService.getQueryArgs(nFilter));

} else {

return this.httpService.get<Count>('/magic/system/magic/ml_training_snippets-count' + this.queryArgService.getQueryArgs(filter));
return this.httpService.get<Count>(
'/magic/system/magic/ml_training_snippets-count' +
this.queryArgService.getQueryArgs(filter));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ <h3 class="mb-2 fw-bold">Machine Learning training data</h3>
[button2Disabled]="!type"
(buttonClick)="create()"
[(type)]="type"
checkBoxText="VSS"
[types]="types">
</app-searchbox>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export class MachineLearningTrainingDataComponent implements OnInit {
'embedding_vss',
'action',
];
filterOnVss: boolean;

constructor(
private dialog: MatDialog,
Expand Down Expand Up @@ -389,12 +390,15 @@ export class MachineLearningTrainingDataComponent implements OnInit {
this.getTrainingData(false);
}

filterList(event: { searchKey: string, type?: string }) {
filterList(event: { searchKey: string, type?: string, checked: boolean }) {

this.filterOnVss = event.checked;

const newFilter: any = {
limit: this.filter.limit,
offset: 0,
};

if (this.filter.order) {
newFilter.order = this.filter.order;
}
Expand Down Expand Up @@ -459,7 +463,7 @@ export class MachineLearningTrainingDataComponent implements OnInit {

private getTrainingData(count: boolean = true) {

this.machineLearningTrainingService.ml_training_snippets(this.filter).subscribe({
this.machineLearningTrainingService.ml_training_snippets(this.filter, this.filterOnVss).subscribe({
next: (result: any[]) => {

this.dataSource = result || [];
Expand All @@ -476,7 +480,7 @@ export class MachineLearningTrainingDataComponent implements OnInit {
}
}

this.machineLearningTrainingService.ml_training_snippets_count(countFilter).subscribe({
this.machineLearningTrainingService.ml_training_snippets_count(countFilter, this.filterOnVss).subscribe({
next: (result: any) => {

this.count = result.count;
Expand Down

0 comments on commit a53b208

Please sign in to comment.