Skip to content

Commit

Permalink
add blocklist.txt to permanently skip resources
Browse files Browse the repository at this point in the history
  • Loading branch information
johnnychen94 committed Sep 6, 2020
1 parent b76603c commit 6e5fd93
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
5 changes: 4 additions & 1 deletion examples/gen_static_full.example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ parameters = Dict(
# whether to show the progress bar
:show_progress => true,

# for how long (hours) you want to skip resources in `/failed_resources.txt` until the next try
# This script generates a `failed_resource.txt` that records failed-to-downloaded files. Some of
# these have already disappears in the network and no longer available. By default, items in
# this file are skipped in next 24 hours. You can configure `skip_duration` to make it larger.
# Or, you could manually create a `blocklist.txt` to permanently skip them.
:skip_duration => 24
)

Expand Down
7 changes: 7 additions & 0 deletions src/mirror_tarball.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,25 @@ function mirror_tarball(
uuid = registry.uuid
name = registry.name
failed_logfile = joinpath(static_dir, "failed_resources.txt")
blocklist_file = joinpath(static_dir, "blocklist.txt")
upstream_str = join(upstreams, ", ")

# Some resources vanishes and can never be downloaded, we skip them in the next 24 hours
last_try_time = query_last_try_datetime(failed_logfile)
skipped = now() - last_try_time < Hour(skip_duration)
skipped_records = skipped ? read_records(failed_logfile) : Set()
blocked_resourced = read_records(blocklist_file)
function _download(resource, tarball; throw_warnings=true)
if resource in skipped_records
@info "fetching resource has failed in the last $skip_duration hours, skip it" resource date=now()
return false
end

if resource in blocked_resourced
@info "resource is listed in blocklist, skip it" resource date=now()
return false
end

try
show_progress && @info "downloading resource..." resource=resource tarball=tarball date=now()
rst = download_and_verify(upstreams, resource, tarball; http_parameters=http_parameters, throw_warnings=throw_warnings)
Expand Down

0 comments on commit 6e5fd93

Please sign in to comment.