diff --git a/dirhunt/crawler.py b/dirhunt/crawler.py index 5fc9942..3ef12cb 100644 --- a/dirhunt/crawler.py +++ b/dirhunt/crawler.py @@ -65,10 +65,12 @@ def __init__(self, max_workers=None, interesting_extensions=None, interesting_fi self.limit = limit self.current_processed_count = 0 self.to_file = to_file + self.initial_urls = [] def add_init_urls(self, *urls): """Add urls to queue. """ + self.initial_urls.extend(urls) for crawler_url in urls: if not isinstance(crawler_url, CrawlerUrl): crawler_url = CrawlerUrl(self, crawler_url, depth=self.depth, timeout=self.timeout) @@ -203,6 +205,7 @@ def options(self): 'proxies': self.proxies, 'delay': self.delay, 'limit': self.limit, + 'initial_urls': self.initial_urls, } @property diff --git a/dirhunt/management.py b/dirhunt/management.py index 3ba5911..ed4f69c 100644 --- a/dirhunt/management.py +++ b/dirhunt/management.py @@ -162,13 +162,13 @@ def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, in not_follow_subdomains=not_follow_subdomains, exclude_sources=exclude_sources, not_allow_redirects=not_allow_redirects, proxies=proxies, delay=delay, limit=limit, to_file=to_file, user_agent=user_agent, cookies=cookies, headers=headers) + crawler.add_init_urls(*urls) if os.path.exists(crawler.get_resume_file()): click.echo('Resuming the previous program execution...') try: crawler.resume(crawler.get_resume_file()) except IncompatibleVersionError as e: click.echo(e) - crawler.add_init_urls(*urls) while True: choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a') \ (set(exclude_flags), set(include_flags))