Skip to content

Commit

Permalink
Issue #99: Use initial urls for the resume file
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekmo committed Feb 6, 2022
1 parent d582150 commit b777b38
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
3 changes: 3 additions & 0 deletions dirhunt/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,12 @@ def __init__(self, max_workers=None, interesting_extensions=None, interesting_fi
self.limit = limit
self.current_processed_count = 0
self.to_file = to_file
self.initial_urls = []

def add_init_urls(self, *urls):
"""Add urls to queue.
"""
self.initial_urls.extend(urls)
for crawler_url in urls:
if not isinstance(crawler_url, CrawlerUrl):
crawler_url = CrawlerUrl(self, crawler_url, depth=self.depth, timeout=self.timeout)
Expand Down Expand Up @@ -203,6 +205,7 @@ def options(self):
'proxies': self.proxies,
'delay': self.delay,
'limit': self.limit,
'initial_urls': self.initial_urls,
}

@property
Expand Down
2 changes: 1 addition & 1 deletion dirhunt/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,13 @@ def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, in
not_follow_subdomains=not_follow_subdomains, exclude_sources=exclude_sources,
not_allow_redirects=not_allow_redirects, proxies=proxies, delay=delay, limit=limit,
to_file=to_file, user_agent=user_agent, cookies=cookies, headers=headers)
crawler.add_init_urls(*urls)
if os.path.exists(crawler.get_resume_file()):
click.echo('Resuming the previous program execution...')
try:
crawler.resume(crawler.get_resume_file())
except IncompatibleVersionError as e:
click.echo(e)
crawler.add_init_urls(*urls)
while True:
choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a') \
(set(exclude_flags), set(include_flags))
Expand Down

0 comments on commit b777b38

Please sign in to comment.