-
Notifications
You must be signed in to change notification settings - Fork 5
/
dirlister.py
executable file
·209 lines (185 loc) · 8.46 KB
/
dirlister.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/bin/python3
# DIRLISTer - Directory Listing Downloader
# September 13th, 2024 01:42 AM WIB
import os
import re
import signal
import concurrent.futures
from datetime import datetime
from urllib.parse import urlparse, urlunparse, unquote
import requests
import bs4
reset = "\033[0m"
red = "\033[91m"
green = "\033[92m"
yellow = "\033[93m"
grey = "\033[90m"
has_been_processed_urls = []
local_dl_path = os.getcwd() + "/dirlister_result_files/"
stop = False
all_scraped_urls = set()
def banner():
os.system("clear")
banner_text = f"""
{green}█▀▄ █ █▀█ █░░ █ █▀ ▀█▀{reset} █▀▀ █▀█
{green}█▄▀ █ █▀▄ █▄▄ █ ▄█ ░█░{reset} ██▄ █▀▄
{green}|
{green}├───{reset} Directory
{green}│ └────── {reset}Listing
{green}└──────────────────────────{reset} Downloader
{green}Current Download Path:{reset} {local_dl_path}
"""
print(banner_text)
def keyboard_interrupt_handler(signal,frame):
import sys
print(f"\n[{yellow}WRN{reset}] KeyboardInterrupt detected, stoping process")
sys.exit(0)
def create_folder_for(target_url):
path = target_url
paths = local_dl_path.split("/") +[p for p in path.split("/")[:-1] if p]
full_path = "/".join(local_dl_path.split("/")+[p for p in path.split("/") if p])
final_path = "/".join(paths)
for i in range(0,len(paths)+1):
if path[:i]:
try:
os.mkdir('/'.join(paths[:i]))
except:
pass
return final_path, full_path
def download_file(remote_file_path, use_ssl=True):
file_name = remote_file_path.split("/")[-1]
target_path = create_folder_for(remote_file_path[remote_file_path.find(":")+1:])
timestamps = datetime.now().strftime("%H:%M:%S")
if file_name not in os.listdir(target_path[0]):
try:
file = requests.get(remote_file_path, verify=use_ssl, stream=True)
content_length = round(int(file.headers.get('content-length',0)) / 1024, 2) # in KB
if file.status_code == 200 and content_length > 0:
with open(os.path.join(target_path[0], file_name), 'wb') as saved_file:
for content_chunk in file.iter_content(chunk_size=1024):
if content_chunk:
saved_file.write(content_chunk)
dl_message = f"[{green}{timestamps}{reset}] Download file {green}{file_name}{reset} ({file.headers.get('content-type', '-')}) {round(content_length/1024, 2)} MB {green}OK{reset}"
else:
dl_message = f"[{red}{timestamps}{reset}] Download file {red}{remote_file_path}{reset} failed. Code: {red}{file.status_code}{reset}, Perhaps forbidden acces or no content file ? 🧐"
except Exception as e:
dl_message = f"[{red}{timestamps}{reset}] Download file {red}{remote_file_path}{reset} failed with error: {red}{e}{reset}"
else:
dl_message = f"[{yellow}{timestamps}{reset}] File {yellow}{file_name}{reset} already exist on download folder {grey}{local_dl_path}{reset}. {yellow}SKIP{reset}"
return dl_message
def get_all_url_from(url, file_extensions=[], use_ssl=True):
all_urls = []
all_urls_json = []
# check if url is not 'file'
check_header = requests.head(url, verify=use_ssl, allow_redirects=True)
if 'text/html' not in check_header.headers.get('Content-Type',''):
is_file_url = True
all_urls_json.append({"is_file": is_file_url, "url":url})
all_urls.append(url)
else:
response = requests.get(url, verify=use_ssl, allow_redirects=True)
host_url = urlparse(response.url)
dir_list_base_url = host_url.scheme+"://"+host_url.netloc
if response.status_code == 200 and "index of" in str(response.content.decode('utf-8')).lower() or "directory listing" in str(response.content.decode('utf-8')).lower():
html = bs4.BeautifulSoup(response.text, 'html.parser')
a_tags = html.findAll('a')
for a in a_tags:
href = a.get('href') if a.text.lower() not in ["parent directory", "name", "last modified", "size", "description","../",".."] else ""
if href:
if host_url.path not in href:
href = "/".join([i for i in host_url.path.split("/") if i] + [i for i in href.split("/") if i])
is_file = True if os.path.splitext(href.lower())[1].replace(".","") and not href.endswith("/") else False
is_absolute = False if href.startswith(dir_list_base_url) else True
full_href = urlunparse((host_url.scheme, host_url.netloc, href, '', '', '')) if is_absolute else href
path = urlparse(full_href).path
if full_href.startswith(dir_list_base_url) and path != "/":
if full_href not in all_urls:
all_urls_json.append({'is_file': is_file, 'url': unquote(full_href)})
all_urls.append(full_href)
return all_urls_json
def perform_scrape_url(url, file_extensions=[], use_ssl=True):
global stop, index
path = urlparse(url)
if not stop:
print(f"[{green}INF{reset}] 📂 Remote folder found: {green}{path.path}{reset}, Scanning..")
urls = get_all_url_from(url, file_extensions=file_extensions)
if urls:
print(f" 🌐 Found {len(urls)} folder/file urls from {green}{path.path}{reset}")
for item in urls:
try:
if item['is_file']:
all_scraped_urls.add(item['url'])
else:
if item['url'] not in has_been_processed_urls:
has_been_processed_urls.append(item['url'])
perform_scrape_url(item['url'])
except:
stop = True
break
def main(url):
use_ssl = True
print(f"[{green}INF{reset}] Checking connection to {green}{url}{reset}..")
try:
def ask_q(question):
answer = input(question+f" {yellow}y{reset}/{yellow}N{reset}: ")
if answer.lower() == "y":
return True
return False
check = requests.get(url, verify=use_ssl, allow_redirects=True)
code = check.status_code
is_directory_listing = "index of" in check.text.lower() or "directory listing" in check.text.lower()
if code == 200:
print(f"[{green}INF{reset}] Connection estabilished !")
continue_ = True
else:
continue_ = ask_q(f"[{yellow}WRN{reset}] The server responded with code {code}, are you sure to continue ?")
if not is_directory_listing:
continue_ = ask_q(f"[{yellow}WRN{reset}] Your given URL looks like not containing 'Index Of' or similar string that indicate the directory listing. Are you sure want to continue ?")
except Exception as err:
if "SSLError" in str(err):
print(f"[{yellow}WRN{reset}] SSL Error occured. Automating perform request with no SSL..")
continue_ = True
use_ssl = False
else:
continue_ = ask_q(f"[{yellow}WRN{reset}] Failed connecting to the server with error {green}{err}{reset} . Are you sure want to continue ?")
if continue_:
exclude_file_extensions = input(f"[{green}INP{reset}] Input exclude file extension sparated by comma, leave blank to include all extensions. {green}example:{reset} zip,jpg,mp4: ")
exclude_file_extensions = [ext.lower() for ext in exclude_file_extensions.split(",")]
perform_scrape_url(url, use_ssl=use_ssl)
urls_without_excluded_ext = [i for i in all_scraped_urls if os.path.splitext(i)[1].replace(".","") not in exclude_file_extensions]
print(f"[{green}INF{reset}] ℹ️ Scraping URL process done with {green}{len(all_scraped_urls)}{reset} urls")
if exclude_file_extensions:
print(f" {yellow}{len(urls_without_excluded_ext)}{reset} urls without exclude extensions.")
print(f"[{green}INF{reset}] 📥 Download process starting..")
max_threads = 10
download_worker = None
# for handling ctrl + c when using concurrent
signal.signal(signal.SIGINT, keyboard_interrupt_handler)
try:
download_worker = concurrent.futures.ThreadPoolExecutor(max_workers=max_threads)
download_process = [download_worker.submit(download_file, url ,use_ssl) for url in urls_without_excluded_ext]
indent = 8
n = 1
for finished_download_process in concurrent.futures.as_completed(download_process):
spaces = " " * (indent - len(str(n)))
print(f"{spaces}{grey}{n}{reset} {finished_download_process.result()}")
n += 1
except KeyboardInterrupt:
download_worker.shutdown(wait=True)
finally:
download_worker.shutdown(wait=True)
print(f"[{green}INF{reset}] ✅ Process has been finished at {green}{datetime.now().strftime('%d/%m/%Y %H:%M:%S')}{reset} !")
def dirlister():
banner()
try:
url = input(f"[{green}INP{reset}] Input your directory listig URL: ")
while not url:
url = input(f"[{yellow}WRN{reset}] Please input the target directory URL ex {green}https://youtargetsite.com/uploads{reset}, CTRL + C to exit: ")
url_scheme = urlparse(url).scheme
if not url_scheme:
url = "https://" + url.strip()
main(url.strip())
except:
exit(0)
if __name__=="__main__":
dirlister()