From 841c02c1230f4098443226ee9e3714324f46f60b Mon Sep 17 00:00:00 2001 From: Andy Jackson Date: Tue, 26 Jan 2021 21:54:40 +0000 Subject: [PATCH] Default closest_limit to 100 instead of 10 (#606) At UKWA we're hitting cases where crawl variation means we have e.g. a lot of redirect records and in these cases the 10 record limit is too low. I can't see any way of configuring this value, so I'm proposing the default is raised. --- pywb/warcserver/index/indexsource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 67d966374..84a54800e 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -113,7 +113,7 @@ def init_from_config(cls, config): class RemoteIndexSource(BaseIndexSource): CDX_MATCH_RX = re.compile('^cdxj?\+(?Phttps?\:.*)') - def __init__(self, api_url, replay_url, url_field='load_url', closest_limit=10): + def __init__(self, api_url, replay_url, url_field='load_url', closest_limit=100): self.api_url = api_url self.replay_url = replay_url self.url_field = url_field