Skip to content

Stabilized v1.2.3 #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 8, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@
</p>

<p align="center">
<img alt="Static Badge" src="https://img.shields.io/badge/v1.2.1-CURRENT%20STABLE%20VERSION?style=for-the-badge&label=CURRENT%20STABLE%20VERSION&color=red"> <img alt="Static Badge" src="https://img.shields.io/badge/v1.2.2-red?style=for-the-badge&logo=f&label=CURRENT%20ROLLING%20VERSION">
<img alt="Static Badge" src="https://img.shields.io/badge/v1.2.2-CURRENT%20STABLE%20VERSION?style=for-the-badge&label=CURRENT%20STABLE%20VERSION&color=red"> <img alt="Static Badge" src="https://img.shields.io/badge/v1.2.3-red?style=for-the-badge&logo=f&label=CURRENT%20ROLLING%20VERSION">
<img alt="Static Badge" src="https://img.shields.io/badge/DOMAIN_OSINT-CATEGORY?style=for-the-badge&label=TOOL%20CATEGORY&color=red"> <img alt="Static Badge" src="https://img.shields.io/badge/CLI-CATEGORY?style=for-the-badge&label=interface%20type&color=red">
</p>

@@ -78,6 +78,11 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer
- SecurityTrails API (deep subdomains and DNS enumeration)
- HudsonRock API (for querying a database with exposed computers which were compromised through global info-stealer campaigns)

5. ***Web-pages snapshoting:*** extended functionality which allows to save web-pages copies in different forms:
- Screenshot snapshotting (saves target domain's page in form of screenshot)
- HTML snapshotting (saves target domain'spage in form of HTML file)
- Wayback Machine snapshotting (saves every version of target domain's page within a user-defined time period)

Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later.

# How to install and run DPULSE
@@ -166,7 +171,7 @@ If you have problems with starting installer.sh, you should try to use `dos2unix
# Tasks to complete before new release
- [x] Add web pages snapshoting (with screenshots)
- [x] Add web pages snapshoting (with web pages copying as HTML objects)
- [ ] Add web pages snapshoting (with Wayback Machine)
- [x] Add web pages snapshoting (with Wayback Machine)

# DPULSE mentions in social medias

@@ -176,6 +181,8 @@ If you have problems with starting installer.sh, you should try to use `dos2unix

### [The very first mention from cybercrime intelligence company (HudsonRock)](https://www.linkedin.com/feed/update/urn:li:share:7294336938495385600/)

### [The very first mention on cybersecurity educational website (Ethical Hackers Academy)](https://ethicalhacksacademy.com/blogs/cyber-security-tools/dpulse)

## X.com mentions:

### [by @DarkWebInformer](https://x.com/DarkWebInformer/status/1787583156775759915?t=Ak1W9ddUPpDvLAkVyQG8fQ&s=19)
7 changes: 6 additions & 1 deletion datagather_modules/data_assembler.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
from screen_snapshotting import take_screenshot
from config_processing import read_config
from html_snapshotting import save_page_as_html
from archive_snapshotting import download_snapshot

try:
import requests
@@ -72,7 +73,7 @@ def report_preprocessing(self, short_domain, report_file_type):
os.makedirs(report_folder, exist_ok=True)
return casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, files_ctime, report_ctime

def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username):
def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username, from_date, end_date):
casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, ctime, report_ctime = self.report_preprocessing(short_domain, report_file_type)
logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE')
print(Fore.GREEN + "Started scanning domain" + Style.RESET_ALL)
@@ -186,6 +187,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png')
elif snapshotting_flag.lower() == 'p':
save_page_as_html(url, report_folder + '//domain_html_copy.html')
elif snapshotting_flag.lower() == 'w':
download_snapshot(short_domain, from_date, end_date, report_folder)
print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL)
else:
pass
@@ -269,6 +272,8 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png')
elif snapshotting_flag.lower() == 'p':
save_page_as_html(url, report_folder + '//domain_html_copy.html')
elif snapshotting_flag.lower() == 'w':
download_snapshot(short_domain, from_date, end_date, report_folder)
print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL)
else:
pass
15 changes: 10 additions & 5 deletions dpulse.py
Original file line number Diff line number Diff line change
@@ -53,17 +53,17 @@
cli = cli_init.Menu()
cli.welcome_menu()

def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username):
def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date):
import xlsx_report_creation as xlsx_rc
import html_report_creation as html_rc
from misc import time_processing

try:
start = time()
if pagesearch_flag in ['y', 'si']:
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username)
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date)
else:
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username)
data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username, from_date, end_date)
end = time() - start
endtime_string = time_processing(end)

@@ -164,7 +164,7 @@ def run():
else:
print(Fore.RED + "\nInvalid API usage mode" + Style.RESET_ALL)
break
snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/N (for None)] >> ")
snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/W(ayback Machine)/N (for None)] >> ")
if pagesearch_flag.lower() == 'y' or pagesearch_flag.lower() == 'n':
if pagesearch_flag.lower() == "n":
pagesearch_ui_mark = 'No'
@@ -196,11 +196,16 @@ def run():
break
else:
snapshotting_ui_mark = 'No'
from_date = end_date = 'N'
if snapshotting_flag.lower() == 's':
from_date = end_date = 'N'
snapshotting_ui_mark = "Yes, domain's main page snapshotting as a screenshot"
elif snapshotting_flag.lower() == 'p':
from_date = end_date = 'N'
snapshotting_ui_mark = "Yes, domain's main page snapshotting as a .HTML file"
elif snapshotting_flag.lower() == 'w': # not supported at the moment
from_date = str(input('Enter start date (YYYYMMDD format): '))
end_date = str(input('Enter end date (YYYYMMDD format): '))
snapshotting_ui_mark = "Yes, domain's main page snapshotting using Wayback Machine"
cli_init.print_prescan_summary(short_domain, report_filetype.upper(), pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark)
print(Fore.LIGHTMAGENTA_EX + "[BASIC SCAN START]\n" + Style.RESET_ALL)
@@ -209,7 +214,7 @@ def run():
if report_filetype.lower() in ['html', 'xlsx']:
process_report(report_filetype, short_domain, url, case_comment,
keywords_list, keywords_flag, dorking_flag, used_api_flag,
pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username)
pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username, from_date, end_date)
else:
print(Fore.RED + "\nUnsupported PageSearch mode. Please choose between Y or N")

2 changes: 1 addition & 1 deletion service/cli_init.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ def welcome_menu(self):
fig = Figlet(font=wm_font)
print('\n')
self.console.print(fig.renderText('DPULSE'), style=preview_style)
print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.2 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL)
print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.3 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL)
print(Fore.MAGENTA + Style.BRIGHT + '[Visit our pages]\nGitHub repository: https://github.com/OSINT-TECHNOLOGIES\nPyPi page: https://pypi.org/project/dpulse/\nDocumentation: https://dpulse.readthedocs.io' + Style.RESET_ALL)

def print_main_menu(self):
11 changes: 9 additions & 2 deletions service/config_processing.py
Original file line number Diff line number Diff line change
@@ -27,10 +27,11 @@ def create_config():
]

config = configparser.ConfigParser()
config['HTML_REPORTING'] = {'template': 'default'}
config['LOGGING'] = {'log_level': 'info'}
config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'}
config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5'}
config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None'}
config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None', 'wayback_retries': '3', 'wayback_req_pause': '2'}
config['USER-AGENTS'] = {}
for i, agent in enumerate(basic_user_agents):
config['USER-AGENTS'][f'agent_{i + 1}'] = agent
@@ -56,6 +57,9 @@ def read_config():
proxies_file_path = config.get('PROXIES', 'proxies_file_path')
installed_browser = config.get('SNAPSHOTTING', 'installed_browser')
opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path')
wayback_retries_amount = config.get('SNAPSHOTTING', 'wayback_retries')
wayback_requests_pause = config.get('SNAPSHOTTING', 'wayback_req_pause')
html_report_template = config.get('HTML_REPORTING', 'template')


config_values = {
@@ -67,7 +71,10 @@ def read_config():
'user_agents': user_agents,
'proxies_file_path': proxies_file_path,
'installed_browser': installed_browser,
'opera_browser_path': opera_browser_path
'opera_browser_path': opera_browser_path,
'wayback_retries_amount': wayback_retries_amount,
'wayback_requests_pause': wayback_requests_pause,
'template': html_report_template
}

return config_values
120 changes: 120 additions & 0 deletions service/pdf_report_templates/compromise_report_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; }
h3 { text-align: center; margin-top: 20px; }
pre { background: #f8f8f8; padding: 5px; border: 1px solid #ddd; }
.section { margin: 10px 40px; }
</style>
</head>
<body>

<h3>OPEN SOURCE RESEARCH REPORT</h3>
<p><b>Organization:</b> {{org}}</p>
<hr />

<h3>TABLE OF CONTENTS</h3>
<div class="section">
<p>1. General scan information</p>
<p>2. WHOIS information</p>
<p>3. Social medias links</p>
<p>4. Subdomains information</p>
<p>5. DNS & SSL information</p>
<p>6. Services & frameworks</p>
<p>7. Basic pre-pentest information</p>
<p>8. Dorking scan info</p>
<p>9. PageSearch results</p>
<p>10. API scan results</p>
</div>
<hr />

<h3>GENERAL SCAN INFO</h3>
<div class="section">
<p><b>Total subdomains:</b> {{a_tsf}}</p>
<p><b>Social media links:</b> {{a_tsm}}</p>
<pre>Robots.txt: {{robots_txt_result}}
Sitemap.xml: {{sitemap_xml_result}}
Dorking: {{dorking_status}}</pre>
</div>
<hr />

<h3>WHOIS INFORMATION</h3>
<div class="section">
<p><b>Domain:</b> {{sh_domain}} <b>URL:</b> {{full_url}}</p>
<pre>IP: {{ip_address}}
Registrar: {{registrar}}
Dates: {{creation_date}} → {{expiration_date}}</pre>
</div>
<hr />

<h3>SOCIAL MEDIAS</h3>
<div class="section">
<p><b>Facebook:</b></p><pre>{% for l in fb_links %}⇒ {{ l }}{% endfor %}</pre>
<p><b>Twitter/X:</b></p><pre>{% for l in tw_links+xcom_links %}⇒ {{ l }}{% endfor %}</pre>
<p><b>Instagram:</b></p><pre>{% for l in inst_links %}⇒ {{ l }}{% endfor %}</pre>
</div>
<hr />

<h3>SUBDOMAINS</h3>
<div class="section">
<p><b>Found subdomains:</b></p><pre>{% for sd in subdomains %}⇒ {{ sd }}{% endfor %}</pre>
<p><b>IPs:</b></p><pre>{% for sdip in subdomain_ip %}⇒ {{ sdip }}{% endfor %}</pre>
</div>
<hr />

<h3>DNS/SSL</h3>
<div class="section">
<pre>NS: {{name_servers}}
MX: {{mx_records}}
SSL Issuer: {{issuer}}
NotBefore: {{notBefore}}
NotAfter: {{notAfter}}</pre>
</div>
<hr />

<h3>SERVICES</h3>
<div class="section">
<p><b>Web servers:</b></p><pre>{% for ws in web_servers %}⇒ {{ ws }}{% endfor %}</pre>
<p><b>CMS:</b></p><pre>{% for cm in cms %}⇒ {{ cm }}{% endfor %}</pre>
<p><b>Languages:</b></p><pre>{% for pl in programming_languages %}⇒ {{ pl }}{% endfor %}</pre>
</div>
<hr />

<h3>BASIC PRE-PENTEST</h3>
<div class="section">
<p><b>Open ports:</b></p><pre>{% for op in ports %}⇒ {{ op }}{% endfor %}</pre>
<p><b>Vulnerabilities:</b></p><pre>{% for vuln in vulns %}⇒ {{ vuln }}{% endfor %}</pre>
</div>
<hr />

<h3>DORKING SCAN</h3>
<div class="section"><pre>{{ add_dsi | safe }}</pre></div>
<hr />

<h3>PAGESEARCH</h3>
<div class="section">
<pre>Subdomains: {{ps_s}}
Emails: {{ps_e}}
Documents: {{ps_f}}
</pre>
</div>
<hr />

<h3>VIRUSTOTAL</h3>
<div class="section"><pre>{{ virustotal_output }}</pre></div>
<h3>SECURITYTRAILS</h3>
<div class="section"><pre>{{ securitytrails_output }}</pre></div>
<h3>HUDSONROCK</h3>
<div class="section"><pre>{{ hudsonrock_output }}</pre></div>
<hr />

<p style="text-align:center;">Created by DPULSE (OSINT-TECHNOLOGIES)</p>
<p style="text-align:center;">
<a href="https://github.com/OSINT-TECHNOLOGIES">GitHub</a> |
<a href="https://pypi.org/project/dpulse/">PyPI</a>
</p>

</body>
</html>
174 changes: 174 additions & 0 deletions service/pdf_report_templates/monospaced_report_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body { font-family: monospace; font-size: 0.9em; }
pre { margin: 0; white-space: pre-wrap; }
h3 { font-family: sans-serif; text-align: center; margin-top: 20px; }
</style>
</head>
<body>

<h3>OPEN SOURCE RESEARCH REPORT</h3>
<pre>Organization: {{org}}</pre>
<hr />

<h3>TABLE OF CONTENTS</h3>
<pre>
1. General info
2. WHOIS
3. Social medias
4. Subdomains
5. DNS/SSL
6. Services
7. Pre-pentest
8. Dorking
9. PageSearch
10. APIs
</pre>
<hr />

<h3>GENERAL SCAN INFO</h3>
<pre>
Subdomains: {{a_tsf}}
Social: {{a_tsm}}
Robots.txt: {{robots_txt_result}}
Sitemap.xml: {{sitemap_xml_result}}
Sitemap links: {{sitemap_links}}
Dorking: {{dorking_status}}
PageSearch: {{pagesearch_ui_mark}}
Snapshotting: {{snapshotting_ui_mark}}
Report time: {{ctime}}
</pre>
<hr />

<h3>WHOIS INFORMATION</h3>
<pre>
Domain: {{sh_domain}}
URL: {{full_url}}
IP: {{ip_address}}
Registrar: {{registrar}}
Created: {{creation_date}}
Expires: {{expiration_date}}
Emails: {{mails}}
</pre>
<hr />

<h3>SOCIAL MEDIAS SEARCH RESULTS</h3>
<pre>
FACEBOOK:
{% for link in fb_links %}⇒ {{ link }}{% endfor %}
TWITTER/X:
{% for link in tw_links+xcom_links %}⇒ {{ link }}{% endfor %}
INSTAGRAM:
{% for link in inst_links %}⇒ {{ link }}{% endfor %}
TELEGRAM:
{% for link in tg_links %}⇒ {{ link }}{% endfor %}
TIKTOK:
{% for link in tt_links %}⇒ {{ link }}{% endfor %}
LINKEDIN:
{% for link in li_links %}⇒ {{ link }}{% endfor %}
VKONTAKTE:
{% for link in vk_links %}⇒ {{ link }}{% endfor %}
YOUTUBE:
{% for link in yt_links %}⇒ {{ link }}{% endfor %}
ODNOKLASSNIKI:
{% for link in ok_links %}⇒ {{ link }}{% endfor %}
WECHAT:
{% for link in wc_links %}⇒ {{ link }}{% endfor %}
</pre>
<hr />

<h3>SUBDOMAINS ANALYSIS RESULTS</h3>
<pre>
FOUND SUBDOMAINS:
{% for sd in subdomains %}⇒ {{ sd }}{% endfor %}
IPs:
{% for sdip in subdomain_ip %}⇒ {{ sdip }}{% endfor %}
Emails:
{% for smails in subdomain_mails %}⇒ {{ smails }}{% endfor %}
</pre>
<hr />

<h3>DNS & SSL INFORMATION</h3>
<pre>
NAME SERVERS: {{name_servers}}
MX RECORDS: {{mx_records}}
SSL ISSUER: {{issuer}}
SUBJECT: {{subject}}
NOT BEFORE: {{notBefore}}
NOT AFTER: {{notAfter}}
COMMON NAME: {{commonName}}
SERIAL: {{serialNumber}}
</pre>
<hr />

<h3>SERVICES & FRAMEWORKS INFORMATION</h3>
<pre>
WEB SERVERS:
{% for ws in web_servers %}⇒ {{ ws }}{% endfor %}
CMS:
{% for cm in cms %}⇒ {{ cm }}{% endfor %}
PROGRAMMING LANGUAGES:
{% for pl in programming_languages %}⇒ {{ pl }}{% endfor %}
WEB FRAMEWORKS:
{% for wf in web_frameworks %}⇒ {{ wf }}{% endfor %}
ANALYTICS:
{% for analytic in analytics %}⇒ {{ analytic }}{% endfor %}
JS FRAMEWORKS:
{% for jsf in javascript_frameworks %}⇒ {{ jsf }}{% endfor %}
TAGS:
{% for tag in tags %}⇒ {{ tag }}{% endfor %}
CPE:
{% for cpe in cpes %}⇒ {{ cpe }}{% endfor %}
</pre>
<hr />

<h3>BASIC PRE-PENTEST INFORMATION</h3>
<pre>
OPEN PORTS:
{% for op in ports %}⇒ {{ op }}{% endfor %}
HOSTNAMES:
{% for hn in hostnames %}⇒ {{ hn }}{% endfor %}
POTENTIAL VULNERABILITIES:
{% for vuln in vulns %}⇒ {{ vuln }}{% endfor %}
</pre>
<hr />

<h3>DORKING SCAN INFO</h3>
<pre>{{ add_dsi | safe }}</pre>
<hr />

<h3>PAGESEARCH RESULTS</h3>
<pre>
SUBDOMAINS FOUND: {{ps_s}}
EMAILS FOUND: {{ps_e}}
DOCUMENTS: {{ps_f}}
COOKIES: {{ps_c}}
API KEYS: {{ps_a}}
WEB ELEMENTS: {{ps_w}}
PASSWORDS: {{ps_p}}
</pre>
<hr />

<h3>VIRUSTOTAL API SCAN RESULTS</h3>
<pre>{{ virustotal_output }}</pre>
<hr />

<h3>SECURITYTRAILS API SCAN RESULTS</h3>
<pre>{{ securitytrails_output }}</pre>
<hr />

<h3>HUDSONROCK API SCAN RESULTS</h3>
<pre>{{ hudsonrock_output }}</pre>
<hr />

<pre>
Created by DPULSE (OSINT-TECHNOLOGIES)
GitHub: https://github.com/OSINT-TECHNOLOGIES
PyPI: https://pypi.org/project/dpulse/
</pre>

</body>
</html>
154 changes: 154 additions & 0 deletions service/pdf_report_templates/paragraph_report_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; color: #333; }
h1, h2, h3 { color: #111; }
h3 { text-align: center; margin-top: 20px; }
ul { list-style: none; padding: 0; }
li { margin: 4px 0; }
.section { margin: 10px 40px; }
</style>
</head>
<body>

<h1 style="text-align:center;">Open Source Research Report</h1>
<h2 style="text-align:center;">{{org}}</h2>
<hr />

<h3>Table of Contents</h3>
<div class="section">
<p>1. General scan information</p>
<p>2. WHOIS information</p>
<p>3. Social medias links</p>
<p>4. Subdomains information</p>
<p>5. DNS & SSL information</p>
<p>6. Services & frameworks</p>
<p>7. Basic pre-pentest information</p>
<p>8. Dorking scan info</p>
<p>9. PageSearch results</p>
<p>10. API scan results</p>
</div>
<hr />

<h3>GENERAL SCAN INFO</h3>
<div class="section">
<p><b>Total subdomains:</b> {{a_tsf}}</p>
<p><b>Total social media links:</b> {{a_tsm}}</p>
<p><b>Status of robots.txt:</b> {{robots_txt_result}}</p>
<p><b>Status of sitemap.xml:</b> {{sitemap_xml_result}}</p>
<p><b>Status of sitemap links:</b> {{sitemap_links}}</p>
<p><b>Google Dorking:</b> {{dorking_status}}</p>
<p><b>PageSearch:</b> {{pagesearch_ui_mark}}</p>
<p><b>Snapshotting:</b> {{snapshotting_ui_mark}}</p>
<p><b>Report time:</b> {{ctime}}</p>
</div>
<hr />

<h3>WHOIS INFORMATION</h3>
<div class="section">
<p><b>Domain:</b> {{sh_domain}}</p>
<p><b>Full URL:</b> {{full_url}}</p>
<p><b>IP address:</b> {{ip_address}}</p>
<p><b>Registrar:</b> {{registrar}}</p>
<p><b>Creation date:</b> {{creation_date}}</p>
<p><b>Expiration date:</b> {{expiration_date}}</p>
<p><b>Organization name:</b> {{org}}</p>
<p><b>Contact e-mails:</b> {{mails}}</p>
</div>
<hr />

<h3>SOCIAL MEDIAS SEARCH RESULTS</h3>
<div class="section">
<p><b>FACEBOOK:</b></p><ul>{% for link in fb_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>TWITTER (+ X.com):</b></p><ul>{% for link in tw_links %}<li>⇒ {{ link }}</li>{% endfor %}{% for link in xcom_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>INSTAGRAM:</b></p><ul>{% for link in inst_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>TELEGRAM:</b></p><ul>{% for link in tg_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>TIKTOK:</b></p><ul>{% for link in tt_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>LINKEDIN:</b></p><ul>{% for link in li_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>VKONTAKTE:</b></p><ul>{% for link in vk_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>YOUTUBE:</b></p><ul>{% for link in yt_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>ODNOKLASSNIKI:</b></p><ul>{% for link in ok_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
<p><b>WECHAT:</b></p><ul>{% for link in wc_links %}<li>⇒ {{ link }}</li>{% endfor %}</ul>
</div>
<hr />

<h3>SUBDOMAINS ANALYSIS RESULTS</h3>
<div class="section">
<p><b>Found subdomains:</b></p><ul>{% for sd in subdomains %}<li>⇒ {{ sd }}</li>{% endfor %}</ul>
<p><b>Subdomains IP addresses:</b></p><ul>{% for sdip in subdomain_ip %}<li>⇒ {{ sdip }}</li>{% endfor %}</ul>
<p><b>Subdomains e-mails:</b></p><ul>{% for smails in subdomain_mails %}<li>⇒ {{ smails }}</li>{% endfor %}</ul>
</div>
<hr />

<h3>DNS & SSL INFORMATION</h3>
<div class="section">
<p><b>(DNS) Name servers:</b> {{name_servers}}</p>
<p><b>(DNS) MX addresses:</b> {{mx_records}}</p>
<p><b>(SSL) Issuer:</b> {{issuer}}</p>
<p><b>(SSL) Subject:</b> {{subject}}</p>
<p><b>(SSL) Not before:</b> {{notBefore}}</p>
<p><b>(SSL) Not after:</b> {{notAfter}}</p>
<p><b>(SSL) Certificate name:</b> {{commonName}}</p>
<p><b>(SSL) Certificate serial number:</b> {{serialNumber}}</p>
</div>
<hr />

<h3>SERVICES & FRAMEWORKS INFORMATION</h3>
<div class="section">
<p><b>Web servers:</b></p><ul>{% for ws in web_servers %}<li>⇒ {{ ws }}</li>{% endfor %}</ul>
<p><b>CMS:</b></p><ul>{% for cm in cms %}<li>⇒ {{ cm }}</li>{% endfor %}</ul>
<p><b>Used programming languages:</b></p><ul>{% for pl in programming_languages %}<li>⇒ {{ pl }}</li>{% endfor %}</ul>
<p><b>Used web frameworks:</b></p><ul>{% for wf in web_frameworks %}<li>⇒ {{ wf }}</li>{% endfor %}</ul>
<p><b>Analytics service:</b></p><ul>{% for analytic in analytics %}<li>⇒ {{ analytic }}</li>{% endfor %}</ul>
<p><b>Used JavaScript frameworks:</b></p><ul>{% for jsf in javascript_frameworks %}<li>⇒ {{ jsf }}</li>{% endfor %}</ul>
<p><b>Tags:</b></p><ul>{% for tag in tags %}<li>⇒ {{ tag }}</li>{% endfor %}</ul>
<p><b>Common Platform Enumeration:</b></p><ul>{% for cpe in cpes %}<li>⇒ {{ cpe }}</li>{% endfor %}</ul>
</div>
<hr />

<h3>BASIC PRE-PENTEST INFORMATION</h3>
<div class="section">
<p><b>Open ports:</b></p><ul>{% for op in ports %}<li>⇒ {{ op }}</li>{% endfor %}</ul>
<p><b>Hostnames:</b></p><ul>{% for hn in hostnames %}<li>⇒ {{ hn }}</li>{% endfor %}</ul>
<p><b>Potential vulnerabilities:</b></p><ul>{% for vuln in vulns %}<li>⇒ {{ vuln }}</li>{% endfor %}</ul>
</div>
<hr />

<h3>DORKING SCAN INFO</h3>
<div class="section"><pre>{{ add_dsi | safe }}</pre></div>
<hr />

<h3>PAGESEARCH RESULTS</h3>
<div class="section">
<p><b>Amount of accessible subdomains:</b> {{ps_s}}</p>
<p><b>Amount of email addresses:</b> {{ps_e}}</p>
<p><b>Amount of found documents:</b> {{ps_f}}</p>
<p><b>Amount of found cookies:</b> {{ps_c}}</p>
<p><b>Amount of found API key:</b> {{ps_a}}</p>
<p><b>Amount of WEB elements found:</b> {{ps_w}}</p>
<p><b>Amount of exposed passwords found:</b> {{ps_p}}</p>
</div>
<hr />

<h3>VIRUSTOTAL API SCAN RESULTS</h3>
<div class="section"><pre>{{ virustotal_output }}</pre></div>
<hr />

<h3>SECURITYTRAILS API SCAN RESULTS</h3>
<div class="section"><pre>{{ securitytrails_output }}</pre></div>
<hr />

<h3>HUDSONROCK API SCAN RESULTS</h3>
<div class="section"><pre>{{ hudsonrock_output }}</pre></div>
<hr />

<p style="text-align:center;">Created by DPULSE (OSINT-TECHNOLOGIES)</p>
<ul style="text-align:center;list-style:none;padding:0;">
<li><a href="https://github.com/OSINT-TECHNOLOGIES">GitHub</a></li>
<li><a href="https://pypi.org/project/dpulse/">PyPI</a></li>
</ul>

</body>
</html>
70 changes: 70 additions & 0 deletions snapshotting/archive_snapshotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import requests
import os
import time
from colorama import Fore, Style
import sys
from config_processing import read_config

sys.path.append('service')
CDX_API = "http://web.archive.org/cdx/search/cdx"

def get_values_from_config():
config_values = read_config()
retries = int(config_values['wayback_retries_amount'])
pause_between_requests = int(config_values['wayback_requests_pause'])
return retries, pause_between_requests

def get_snapshots(url, from_date, to_date):
params = {
"url": url,
"from": from_date,
"to": to_date,
"output": "json",
"fl": "timestamp,original,mime",
"filter": "statuscode:200",
"collapse": "digest"
}
print(Fore.GREEN + f"Sending request to Wayback CDX API for {url}, period: {from_date} - {to_date}..." + Style.RESET_ALL)
response = requests.get(CDX_API, params=params)
response.raise_for_status()
data = response.json()
return data[1:]

def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index):
retries, _ = get_values_from_config()
archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}"
for attempt in range(1, retries + 1):
try:
response = requests.get(archive_url, timeout=15)
response.raise_for_status()
filename = f"{index}_{timestamp}.html"
filepath = os.path.join(snapshot_storage_folder, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(response.text)
print(Fore.GREEN + f"[{index}] Downloaded: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{archive_url}" + Style.RESET_ALL)
return True
except Exception as e:
print(Fore.RED + f"[{index}] Attempt {attempt}/{retries} failed for {archive_url}. Retrying..." + Style.RESET_ALL)
time.sleep(2)
print(Fore.RED + f"[{index}] Failed to download after {retries} attempts: {archive_url}" + Style.RESET_ALL)
return False

def download_snapshot(short_domain, from_date, end_date, report_folder):
_, pause_between_requests = get_values_from_config()
snapshot_storage_folder = report_folder + '//wayback_snapshots'
os.makedirs(snapshot_storage_folder, exist_ok=True)
snapshots = get_snapshots(short_domain, from_date, end_date)
print(Fore.GREEN + "Total snapshots found:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(snapshots)}" + Style.RESET_ALL)
html_snapshots = [
s for s in snapshots
if len(s) >= 2 and (
s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain)
]
print(Fore.GREEN + "HTML snapshots to download:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(html_snapshots)}\n" + Style.RESET_ALL)
if not html_snapshots:
print(Fore.RED + "No HTML snapshots available for download." + Style.RESET_ALL)
return
for i, (timestamp, original_url, *_) in enumerate(html_snapshots):
snapshot_enum(snapshot_storage_folder, timestamp, original_url, i)
time.sleep(pause_between_requests)
print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL)