From 0d6259a49009b686a79b8a678810d95e4ed47aea Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:17:55 +0300 Subject: [PATCH 01/13] Update cli_init.py --- service/cli_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cli_init.py b/service/cli_init.py index 0431ae3..a2d5cf4 100644 --- a/service/cli_init.py +++ b/service/cli_init.py @@ -20,7 +20,7 @@ def welcome_menu(self): fig = Figlet(font=wm_font) print('\n') self.console.print(fig.renderText('DPULSE'), style=preview_style) - print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.2 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) + print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.3 rolling] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) print(Fore.MAGENTA + Style.BRIGHT + '[Visit our pages]\nGitHub repository: https://github.com/OSINT-TECHNOLOGIES\nPyPi page: https://pypi.org/project/dpulse/\nDocumentation: https://dpulse.readthedocs.io' + Style.RESET_ALL) def print_main_menu(self): From 679357b93d2a81fab519de7c611675d7a86e5d65 Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:27:09 +0300 Subject: [PATCH 02/13] Added new mention / bumped version --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ca0075..b5c48f7 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@
-
+
Organization: {{org}}
+1. General scan information
+2. WHOIS information
+3. Social medias links
+4. Subdomains information
+5. DNS & SSL information
+6. Services & frameworks
+7. Basic pre-pentest information
+8. Dorking scan info
+9. PageSearch results
+10. API scan results
+Total subdomains: {{a_tsf}}
+Social media links: {{a_tsm}}
+Robots.txt: {{robots_txt_result}} +Sitemap.xml: {{sitemap_xml_result}} +Dorking: {{dorking_status}}+
Domain: {{sh_domain}} URL: {{full_url}}
+IP: {{ip_address}} +Registrar: {{registrar}} +Dates: {{creation_date}} → {{expiration_date}}+
Facebook:
{% for l in fb_links %}⇒ {{ l }}{% endfor %}+
Twitter/X:
{% for l in tw_links+xcom_links %}⇒ {{ l }}{% endfor %}+
Instagram:
{% for l in inst_links %}⇒ {{ l }}{% endfor %}+
Found subdomains:
{% for sd in subdomains %}⇒ {{ sd }}{% endfor %}+
IPs:
{% for sdip in subdomain_ip %}⇒ {{ sdip }}{% endfor %}+
NS: {{name_servers}} +MX: {{mx_records}} +SSL Issuer: {{issuer}} +NotBefore: {{notBefore}} +NotAfter: {{notAfter}}+
Web servers:
{% for ws in web_servers %}⇒ {{ ws }}{% endfor %}+
CMS:
{% for cm in cms %}⇒ {{ cm }}{% endfor %}+
Languages:
{% for pl in programming_languages %}⇒ {{ pl }}{% endfor %}+
Open ports:
{% for op in ports %}⇒ {{ op }}{% endfor %}+
Vulnerabilities:
{% for vuln in vulns %}⇒ {{ vuln }}{% endfor %}+
{{ add_dsi | safe }}
Subdomains: {{ps_s}} +Emails: {{ps_e}} +Documents: {{ps_f}} +…+
{{ virustotal_output }}
{{ securitytrails_output }}
{{ hudsonrock_output }}
Created by DPULSE (OSINT-TECHNOLOGIES)
+ + + + \ No newline at end of file diff --git a/service/pdf_report_templates/monospaced_report_template.html b/service/pdf_report_templates/monospaced_report_template.html new file mode 100644 index 0000000..e3521a4 --- /dev/null +++ b/service/pdf_report_templates/monospaced_report_template.html @@ -0,0 +1,174 @@ + + + + + + + + +Organization: {{org}}+
+1. General info +2. WHOIS +3. Social medias +4. Subdomains +5. DNS/SSL +6. Services +7. Pre-pentest +8. Dorking +9. PageSearch +10. APIs ++
+Subdomains: {{a_tsf}} +Social: {{a_tsm}} +Robots.txt: {{robots_txt_result}} +Sitemap.xml: {{sitemap_xml_result}} +Sitemap links: {{sitemap_links}} +Dorking: {{dorking_status}} +PageSearch: {{pagesearch_ui_mark}} +Snapshotting: {{snapshotting_ui_mark}} +Report time: {{ctime}} ++
+Domain: {{sh_domain}} +URL: {{full_url}} +IP: {{ip_address}} +Registrar: {{registrar}} +Created: {{creation_date}} +Expires: {{expiration_date}} +Emails: {{mails}} ++
+FACEBOOK: +{% for link in fb_links %}⇒ {{ link }}{% endfor %} +TWITTER/X: +{% for link in tw_links+xcom_links %}⇒ {{ link }}{% endfor %} +INSTAGRAM: +{% for link in inst_links %}⇒ {{ link }}{% endfor %} +TELEGRAM: +{% for link in tg_links %}⇒ {{ link }}{% endfor %} +TIKTOK: +{% for link in tt_links %}⇒ {{ link }}{% endfor %} +LINKEDIN: +{% for link in li_links %}⇒ {{ link }}{% endfor %} +VKONTAKTE: +{% for link in vk_links %}⇒ {{ link }}{% endfor %} +YOUTUBE: +{% for link in yt_links %}⇒ {{ link }}{% endfor %} +ODNOKLASSNIKI: +{% for link in ok_links %}⇒ {{ link }}{% endfor %} +WECHAT: +{% for link in wc_links %}⇒ {{ link }}{% endfor %} ++
+FOUND SUBDOMAINS: +{% for sd in subdomains %}⇒ {{ sd }}{% endfor %} +IPs: +{% for sdip in subdomain_ip %}⇒ {{ sdip }}{% endfor %} +Emails: +{% for smails in subdomain_mails %}⇒ {{ smails }}{% endfor %} ++
+NAME SERVERS: {{name_servers}} +MX RECORDS: {{mx_records}} +SSL ISSUER: {{issuer}} +SUBJECT: {{subject}} +NOT BEFORE: {{notBefore}} +NOT AFTER: {{notAfter}} +COMMON NAME: {{commonName}} +SERIAL: {{serialNumber}} ++
+WEB SERVERS: +{% for ws in web_servers %}⇒ {{ ws }}{% endfor %} +CMS: +{% for cm in cms %}⇒ {{ cm }}{% endfor %} +PROGRAMMING LANGUAGES: +{% for pl in programming_languages %}⇒ {{ pl }}{% endfor %} +WEB FRAMEWORKS: +{% for wf in web_frameworks %}⇒ {{ wf }}{% endfor %} +ANALYTICS: +{% for analytic in analytics %}⇒ {{ analytic }}{% endfor %} +JS FRAMEWORKS: +{% for jsf in javascript_frameworks %}⇒ {{ jsf }}{% endfor %} +TAGS: +{% for tag in tags %}⇒ {{ tag }}{% endfor %} +CPE: +{% for cpe in cpes %}⇒ {{ cpe }}{% endfor %} ++
+OPEN PORTS: +{% for op in ports %}⇒ {{ op }}{% endfor %} +HOSTNAMES: +{% for hn in hostnames %}⇒ {{ hn }}{% endfor %} +POTENTIAL VULNERABILITIES: +{% for vuln in vulns %}⇒ {{ vuln }}{% endfor %} ++
{{ add_dsi | safe }}+
+SUBDOMAINS FOUND: {{ps_s}} +EMAILS FOUND: {{ps_e}} +DOCUMENTS: {{ps_f}} +COOKIES: {{ps_c}} +API KEYS: {{ps_a}} +WEB ELEMENTS: {{ps_w}} +PASSWORDS: {{ps_p}} ++
{{ virustotal_output }}+
{{ securitytrails_output }}+
{{ hudsonrock_output }}+
+Created by DPULSE (OSINT-TECHNOLOGIES) +GitHub: https://github.com/OSINT-TECHNOLOGIES +PyPI: https://pypi.org/project/dpulse/ ++ + + \ No newline at end of file diff --git a/service/pdf_report_templates/paragraph_report_template.html b/service/pdf_report_templates/paragraph_report_template.html new file mode 100644 index 0000000..cf966ab --- /dev/null +++ b/service/pdf_report_templates/paragraph_report_template.html @@ -0,0 +1,154 @@ + + + + + + + + +
1. General scan information
+2. WHOIS information
+3. Social medias links
+4. Subdomains information
+5. DNS & SSL information
+6. Services & frameworks
+7. Basic pre-pentest information
+8. Dorking scan info
+9. PageSearch results
+10. API scan results
+Total subdomains: {{a_tsf}}
+Total social media links: {{a_tsm}}
+Status of robots.txt: {{robots_txt_result}}
+Status of sitemap.xml: {{sitemap_xml_result}}
+Status of sitemap links: {{sitemap_links}}
+Google Dorking: {{dorking_status}}
+PageSearch: {{pagesearch_ui_mark}}
+Snapshotting: {{snapshotting_ui_mark}}
+Report time: {{ctime}}
+Domain: {{sh_domain}}
+Full URL: {{full_url}}
+IP address: {{ip_address}}
+Registrar: {{registrar}}
+Creation date: {{creation_date}}
+Expiration date: {{expiration_date}}
+Organization name: {{org}}
+Contact e-mails: {{mails}}
+FACEBOOK:
TWITTER (+ X.com):
INSTAGRAM:
TELEGRAM:
TIKTOK:
LINKEDIN:
VKONTAKTE:
YOUTUBE:
ODNOKLASSNIKI:
WECHAT:
Found subdomains:
Subdomains IP addresses:
Subdomains e-mails:
(DNS) Name servers: {{name_servers}}
+(DNS) MX addresses: {{mx_records}}
+(SSL) Issuer: {{issuer}}
+(SSL) Subject: {{subject}}
+(SSL) Not before: {{notBefore}}
+(SSL) Not after: {{notAfter}}
+(SSL) Certificate name: {{commonName}}
+(SSL) Certificate serial number: {{serialNumber}}
+Web servers:
CMS:
Used programming languages:
Used web frameworks:
Analytics service:
Used JavaScript frameworks:
Tags:
Common Platform Enumeration:
Open ports:
Hostnames:
Potential vulnerabilities:
{{ add_dsi | safe }}
Amount of accessible subdomains: {{ps_s}}
+Amount of email addresses: {{ps_e}}
+Amount of found documents: {{ps_f}}
+Amount of found cookies: {{ps_c}}
+Amount of found API key: {{ps_a}}
+Amount of WEB elements found: {{ps_w}}
+Amount of exposed passwords found: {{ps_p}}
+{{ virustotal_output }}
{{ securitytrails_output }}
{{ hudsonrock_output }}
Created by DPULSE (OSINT-TECHNOLOGIES)
+ + + + \ No newline at end of file From f0d4a9aba958e2b31fcc7a8f5a51181e991273df Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Fri, 4 Apr 2025 22:02:09 +0300 Subject: [PATCH 06/13] Added Wayback Machine snapshoting module --- snapshotting/archive_snapshotting.py | 65 ++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 snapshotting/archive_snapshotting.py diff --git a/snapshotting/archive_snapshotting.py b/snapshotting/archive_snapshotting.py new file mode 100644 index 0000000..e8d1ee0 --- /dev/null +++ b/snapshotting/archive_snapshotting.py @@ -0,0 +1,65 @@ +import requests +import os +import time +from colorama import Fore, Style + +CDX_API = "http://web.archive.org/cdx/search/cdx" +RETRIES = 3 +PAUSE_BETWEEN_REQUESTS = 2 # seconds + +def get_snapshots(url, from_date, to_date): + params = { + "url": url, + "from": from_date, + "to": to_date, + "output": "json", + "fl": "timestamp,original,mime", + "filter": "statuscode:200", + "collapse": "digest" + } + print(Fore.GREEN + f"Sending request to Wayback CDX API for {url}, period: {from_date} - {to_date}..." + Style.RESET_ALL) + response = requests.get(CDX_API, params=params) + response.raise_for_status() + data = response.json() + return data[1:] + +def snapshot_enum(report_folder, timestamp, original_url, index, retries=RETRIES): + archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}" + for attempt in range(1, retries + 1): + try: + response = requests.get(archive_url, timeout=15) + response.raise_for_status() + + filename = f"{index}_{timestamp}.html" + filepath = os.path.join(report_folder, filename) + + with open(filepath, "w", encoding="utf-8") as f: + f.write(response.text) + + print(Fore.GREEN + f"[{index}] Downloaded: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{archive_url}" + Style.RESET_ALL) + return True + except Exception as e: + print(Fore.RED + f"[{index}] Attempt {attempt}/{retries} failed for {archive_url}. Retrying..." + Style.RESET_ALL) + time.sleep(2) + + print(Fore.RED + f"[{index}] Failed to download after {retries} attempts: {archive_url}" + Style.RESET_ALL) + return False + +def download_snapshot(short_domain, from_date, end_date, report_folder): + os.makedirs(report_folder, exist_ok=True) + snapshots = get_snapshots(short_domain, from_date, end_date) + print(Fore.GREEN + "Total snapshots found:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(snapshots)}" + Style.RESET_ALL) + html_snapshots = [ + s for s in snapshots + if len(s) >= 2 and ( + s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain + ) + ] + print(Fore.GREEN + "HTML snapshots to download:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(html_snapshots)}\n" + Style.RESET_ALL) + if not html_snapshots: + print(Fore.RED + "No HTML snapshots available for download." + Style.RESET_ALL) + return + for i, (timestamp, original_url, *_) in enumerate(html_snapshots): + snapshot_enum(report_folder, timestamp, original_url, i) + time.sleep(PAUSE_BETWEEN_REQUESTS) + print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL) \ No newline at end of file From c0f10d56d8ec8bf8d41c9b0980b078733fc1bd05 Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Fri, 4 Apr 2025 22:03:03 +0300 Subject: [PATCH 07/13] Added support for Wayback Machine snapshoting --- dpulse.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dpulse.py b/dpulse.py index 63a8353..3760100 100644 --- a/dpulse.py +++ b/dpulse.py @@ -53,7 +53,7 @@ cli = cli_init.Menu() cli.welcome_menu() -def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username): +def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date): import xlsx_report_creation as xlsx_rc import html_report_creation as html_rc from misc import time_processing @@ -61,9 +61,9 @@ def process_report(report_filetype, short_domain, url, case_comment, keywords_li try: start = time() if pagesearch_flag in ['y', 'si']: - data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username) + data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date) else: - data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username) + data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date) end = time() - start endtime_string = time_processing(end) @@ -164,7 +164,7 @@ def run(): else: print(Fore.RED + "\nInvalid API usage mode" + Style.RESET_ALL) break - snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/N (for None)] >> ") + snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/W(ayback Machine)/N (for None)] >> ") if pagesearch_flag.lower() == 'y' or pagesearch_flag.lower() == 'n': if pagesearch_flag.lower() == "n": pagesearch_ui_mark = 'No' @@ -196,11 +196,16 @@ def run(): break else: snapshotting_ui_mark = 'No' + from_date = end_date = 'N' if snapshotting_flag.lower() == 's': + from_date = end_date = 'N' snapshotting_ui_mark = "Yes, domain's main page snapshotting as a screenshot" elif snapshotting_flag.lower() == 'p': + from_date = end_date = 'N' snapshotting_ui_mark = "Yes, domain's main page snapshotting as a .HTML file" elif snapshotting_flag.lower() == 'w': # not supported at the moment + from_date = str(input('Enter start date (YYYYMMDD format): ')) + end_date = str(input('Enter end date (YYYYMMDD format): ')) snapshotting_ui_mark = "Yes, domain's main page snapshotting using Wayback Machine" cli_init.print_prescan_summary(short_domain, report_filetype.upper(), pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark) print(Fore.LIGHTMAGENTA_EX + "[BASIC SCAN START]\n" + Style.RESET_ALL) @@ -209,7 +214,7 @@ def run(): if report_filetype.lower() in ['html', 'xlsx']: process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, - pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username) + pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date) else: print(Fore.RED + "\nUnsupported PageSearch mode. Please choose between Y or N") From 02d03698548100a908a3cd00d92d349dbbb7362a Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Fri, 4 Apr 2025 22:03:39 +0300 Subject: [PATCH 08/13] Added support for Wayback Machine snapshoting --- datagather_modules/data_assembler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datagather_modules/data_assembler.py b/datagather_modules/data_assembler.py index 1822976..8d97c25 100644 --- a/datagather_modules/data_assembler.py +++ b/datagather_modules/data_assembler.py @@ -16,6 +16,7 @@ from screen_snapshotting import take_screenshot from config_processing import read_config from html_snapshotting import save_page_as_html +from archive_snapshotting import download_snapshot try: import requests @@ -72,7 +73,7 @@ def report_preprocessing(self, short_domain, report_file_type): os.makedirs(report_folder, exist_ok=True) return casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, files_ctime, report_ctime - def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username): + def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username, from_date, end_date): casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, ctime, report_ctime = self.report_preprocessing(short_domain, report_file_type) logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE') print(Fore.GREEN + "Started scanning domain" + Style.RESET_ALL) @@ -186,6 +187,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png') elif snapshotting_flag.lower() == 'p': save_page_as_html(url, report_folder + '//domain_html_copy.html') + elif snapshotting_flag.lower() == 'w': + download_snapshot(short_domain, from_date, end_date, report_folder) print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) else: pass @@ -269,6 +272,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png') elif snapshotting_flag.lower() == 'p': save_page_as_html(url, report_folder + '//domain_html_copy.html') + elif snapshotting_flag.lower() == 'w': + download_snapshot(short_domain, from_date, end_date, report_folder) print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) else: pass From f58429a012218188303962ff6d0e626181e53f2a Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Sat, 5 Apr 2025 01:12:06 +0300 Subject: [PATCH 09/13] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f7a00eb..9aeff91 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,9 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer - HudsonRock API (for querying a database with exposed computers which were compromised through global info-stealer campaigns) 5. ***Web-pages snapshoting:*** extended functionality which allows to save web-pages copies in different forms: - - Screenshot snapshotting (saves domain's main page in form of screenshot) - - HTML snapshotting (saves domain's main page in form of HTML file) + - Screenshot snapshotting (saves target domain's page in form of screenshot) + - HTML snapshotting (saves target domain'spage in form of HTML file) + - Wayback Machine snapshotting (saves every version of target domain's page within a user-defined time period) Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later. @@ -170,7 +171,7 @@ If you have problems with starting installer.sh, you should try to use `dos2unix # Tasks to complete before new release - [x] Add web pages snapshoting (with screenshots) - [x] Add web pages snapshoting (with web pages copying as HTML objects) -- [ ] Add web pages snapshoting (with Wayback Machine) +- [x] Add web pages snapshoting (with Wayback Machine) # DPULSE mentions in social medias From 60cbc31a8904cb6257f43d07a015dc4d998d5870 Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Sat, 5 Apr 2025 01:25:14 +0300 Subject: [PATCH 10/13] Added nested folder for wayback snapshots --- snapshotting/archive_snapshotting.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/snapshotting/archive_snapshotting.py b/snapshotting/archive_snapshotting.py index e8d1ee0..7a5c338 100644 --- a/snapshotting/archive_snapshotting.py +++ b/snapshotting/archive_snapshotting.py @@ -23,43 +23,39 @@ def get_snapshots(url, from_date, to_date): data = response.json() return data[1:] -def snapshot_enum(report_folder, timestamp, original_url, index, retries=RETRIES): +def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index, retries=RETRIES): archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}" for attempt in range(1, retries + 1): try: response = requests.get(archive_url, timeout=15) response.raise_for_status() - filename = f"{index}_{timestamp}.html" - filepath = os.path.join(report_folder, filename) - + filepath = os.path.join(snapshot_storage_folder, filename) with open(filepath, "w", encoding="utf-8") as f: f.write(response.text) - print(Fore.GREEN + f"[{index}] Downloaded: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{archive_url}" + Style.RESET_ALL) return True except Exception as e: print(Fore.RED + f"[{index}] Attempt {attempt}/{retries} failed for {archive_url}. Retrying..." + Style.RESET_ALL) time.sleep(2) - print(Fore.RED + f"[{index}] Failed to download after {retries} attempts: {archive_url}" + Style.RESET_ALL) return False def download_snapshot(short_domain, from_date, end_date, report_folder): - os.makedirs(report_folder, exist_ok=True) + snapshot_storage_folder = report_folder + '//wayback_snapshots' + os.makedirs(snapshot_storage_folder, exist_ok=True) snapshots = get_snapshots(short_domain, from_date, end_date) print(Fore.GREEN + "Total snapshots found:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(snapshots)}" + Style.RESET_ALL) html_snapshots = [ s for s in snapshots if len(s) >= 2 and ( - s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain - ) + s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain) ] print(Fore.GREEN + "HTML snapshots to download:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(html_snapshots)}\n" + Style.RESET_ALL) if not html_snapshots: print(Fore.RED + "No HTML snapshots available for download." + Style.RESET_ALL) return for i, (timestamp, original_url, *_) in enumerate(html_snapshots): - snapshot_enum(report_folder, timestamp, original_url, i) + snapshot_enum(snapshot_storage_folder, timestamp, original_url, i) time.sleep(PAUSE_BETWEEN_REQUESTS) - print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL) \ No newline at end of file + print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL) From de9c6bb550859fa558ca3450b717e54081aca43a Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Sat, 5 Apr 2025 01:54:56 +0300 Subject: [PATCH 11/13] Added config values for Wayback Machine snapshotting --- service/config_processing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/service/config_processing.py b/service/config_processing.py index 3ed9bea..49e0fa1 100644 --- a/service/config_processing.py +++ b/service/config_processing.py @@ -31,7 +31,7 @@ def create_config(): config['LOGGING'] = {'log_level': 'info'} config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'} config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5'} - config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None'} + config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None', 'wayback_retries': '3', 'wayback_req_pause': '2'} config['USER-AGENTS'] = {} for i, agent in enumerate(basic_user_agents): config['USER-AGENTS'][f'agent_{i + 1}'] = agent @@ -57,6 +57,8 @@ def read_config(): proxies_file_path = config.get('PROXIES', 'proxies_file_path') installed_browser = config.get('SNAPSHOTTING', 'installed_browser') opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path') + wayback_retries_amount = config.get('SNAPSHOTTING', 'wayback_retries') + wayback_requests_pause = config.get('SNAPSHOTTING', 'wayback_req_pause') html_report_template = config.get('HTML_REPORTING', 'template') @@ -70,6 +72,8 @@ def read_config(): 'proxies_file_path': proxies_file_path, 'installed_browser': installed_browser, 'opera_browser_path': opera_browser_path, + 'wayback_retries_amount': wayback_retries_amount, + 'wayback_requests_pause': wayback_requests_pause, 'template': html_report_template } From 5918d99006d1b177746bd78624c3791caa8bf5eb Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Sat, 5 Apr 2025 02:13:32 +0300 Subject: [PATCH 12/13] Added function for getting wayback snapshotting values from config file --- snapshotting/archive_snapshotting.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/snapshotting/archive_snapshotting.py b/snapshotting/archive_snapshotting.py index 7a5c338..03ee561 100644 --- a/snapshotting/archive_snapshotting.py +++ b/snapshotting/archive_snapshotting.py @@ -2,10 +2,17 @@ import os import time from colorama import Fore, Style +import sys +from config_processing import read_config +sys.path.append('service') CDX_API = "http://web.archive.org/cdx/search/cdx" -RETRIES = 3 -PAUSE_BETWEEN_REQUESTS = 2 # seconds + +def get_values_from_config(): + config_values = read_config() + retries = int(config_values['wayback_retries_amount']) + pause_between_requests = int(config_values['wayback_requests_pause']) + return retries, pause_between_requests def get_snapshots(url, from_date, to_date): params = { @@ -23,7 +30,8 @@ def get_snapshots(url, from_date, to_date): data = response.json() return data[1:] -def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index, retries=RETRIES): +def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index): + retries, _ = get_values_from_config() archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}" for attempt in range(1, retries + 1): try: @@ -42,6 +50,7 @@ def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index, retri return False def download_snapshot(short_domain, from_date, end_date, report_folder): + _, pause_between_requests = get_values_from_config() snapshot_storage_folder = report_folder + '//wayback_snapshots' os.makedirs(snapshot_storage_folder, exist_ok=True) snapshots = get_snapshots(short_domain, from_date, end_date) @@ -57,5 +66,5 @@ def download_snapshot(short_domain, from_date, end_date, report_folder): return for i, (timestamp, original_url, *_) in enumerate(html_snapshots): snapshot_enum(snapshot_storage_folder, timestamp, original_url, i) - time.sleep(PAUSE_BETWEEN_REQUESTS) + time.sleep(pause_between_requests) print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL) From 01667de53f1f520717bdbd4ae61df6d88e8ba24f Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Tue, 8 Apr 2025 15:47:21 +0300 Subject: [PATCH 13/13] Update cli_init.py --- service/cli_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cli_init.py b/service/cli_init.py index a2d5cf4..c0a48ff 100644 --- a/service/cli_init.py +++ b/service/cli_init.py @@ -20,7 +20,7 @@ def welcome_menu(self): fig = Figlet(font=wm_font) print('\n') self.console.print(fig.renderText('DPULSE'), style=preview_style) - print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.3 rolling] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) + print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.3 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) print(Fore.MAGENTA + Style.BRIGHT + '[Visit our pages]\nGitHub repository: https://github.com/OSINT-TECHNOLOGIES\nPyPi page: https://pypi.org/project/dpulse/\nDocumentation: https://dpulse.readthedocs.io' + Style.RESET_ALL) def print_main_menu(self):