Skip to content

Commit

Permalink
Bug fix during periodic crawling
Browse files Browse the repository at this point in the history
  • Loading branch information
polterguy committed Oct 22, 2023
1 parent 712087f commit 35d724a
Showing 1 changed file with 46 additions and 29 deletions.
75 changes: 46 additions & 29 deletions backend/files/system/openai/magic.startup/crawling/crawl-models.hl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ tasks.create:ainiro-crawl-machine-learning-models
repeats:24.hours
.lambda

// Basic logging.
log.info:Task to periodically crawl models about to be executed

// Connecting to database.
data.connect:[generic|magic]

Expand All @@ -29,35 +32,49 @@ tasks.create:ainiro-crawl-machine-learning-models
// Looping through all types returned above.
for-each:x:@data.read/*

// Making sure exceptions never leave thread.
try

// Verifying user can create more snippets.
add:x:+
get-nodes:x:@.dp/#/*/type
try-signal:magic.ai.can-create-snippet

// Doing some logging.
log.info:Crawling website
url:x:@.dp/#/*/base_url
type:x:@.dp/#/*/type

// Invoking slot doing the heavy lifting.
unwrap:x:./*/signal/*
unwrap:x:./*/signal/*/.onafter/**
signal:magic.ai.crawl-site
delay:int:2000
threshold:int:150
summarize:bool:true
url:x:@.dp/#/*/base_url
type:x:@.dp/#/*/type
.onafter
signal:magic.ai.vectorise
// Verifying URL is not null and not empty.
if
and
not-null:x:@.dp/#/*/base_url
neq:x:@.dp/#/*/base_url
.:
.lambda

// Making sure exceptions never leave thread.
try

// Verifying user can create more snippets.
add:x:+
get-nodes:x:@.dp/#/*/type
try-signal:magic.ai.can-create-snippet

// Doing some logging.
log.info:Crawling website
url:x:@.dp/#/*/base_url
type:x:@.dp/#/*/type

// Invoking slot doing the heavy lifting.
unwrap:x:./*/signal/*
unwrap:x:./*/signal/*/.onafter/**
signal:magic.ai.crawl-site
delay:int:2000
threshold:int:150
max:int:1250
summarize:bool:true
url:x:@.dp/#/*/base_url
type:x:@.dp/#/*/type
feedback-channel:non-existing
.onafter
signal:magic.ai.vectorise
feedback-channel:non-existing
type:x:@.dp/#/*/type

.catch

.catch
// Logging.
log.error:Could not crawl site
error:x:@.arguments/*/message
site:x:@.dp/#/*/base_url

// Logging.
log.error:Could not crawl site
error:x:@.arguments/*/message
site:x:@.dp/#/*/base_url
// Basic logging.
log.info:All models scheduled for periodically crawling were crawled

0 comments on commit 35d724a

Please sign in to comment.