Issue #99: Use initial urls for the resume file

Nekmo · Feb 6, 2022 · b777b38 · b777b38
1 parent d582150
commit b777b38
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 1 deletion.
diff --git a/dirhunt/crawler.py b/dirhunt/crawler.py
@@ -65,10 +65,12 @@ def __init__(self, max_workers=None, interesting_extensions=None, interesting_fi
         self.limit = limit
         self.current_processed_count = 0
         self.to_file = to_file
+        self.initial_urls = []
 
     def add_init_urls(self, *urls):
         """Add urls to queue.
         """
+        self.initial_urls.extend(urls)
         for crawler_url in urls:
             if not isinstance(crawler_url, CrawlerUrl):
                 crawler_url = CrawlerUrl(self, crawler_url, depth=self.depth, timeout=self.timeout)
@@ -203,6 +205,7 @@ def options(self):
             'proxies': self.proxies,
             'delay': self.delay,
             'limit': self.limit,
+            'initial_urls': self.initial_urls,
         }
 
     @property

diff --git a/dirhunt/management.py b/dirhunt/management.py
@@ -162,13 +162,13 @@ def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, in
                       not_follow_subdomains=not_follow_subdomains, exclude_sources=exclude_sources,
                       not_allow_redirects=not_allow_redirects, proxies=proxies, delay=delay, limit=limit,
                       to_file=to_file, user_agent=user_agent, cookies=cookies, headers=headers)
+    crawler.add_init_urls(*urls)
     if os.path.exists(crawler.get_resume_file()):
         click.echo('Resuming the previous program execution...')
         try:
             crawler.resume(crawler.get_resume_file())
         except IncompatibleVersionError as e:
             click.echo(e)
-    crawler.add_init_urls(*urls)
     while True:
         choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a') \
             (set(exclude_flags), set(include_flags))