-
Notifications
You must be signed in to change notification settings - Fork 5
/
gen.py
83 lines (68 loc) · 3.37 KB
/
gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys
import tldextract
import argparse
def combinations(words, symbol):
words_split = words.split(symbol)
# exclude the first combination when split words length is equal to 1
results = [symbol.join(words_split[:i + 1]) for i in range(len(words_split)) if not(i == 0 and len(words_split) == 1)]
return [result for result in results if result]
def concat_extensions(results, extensions):
# filtering out any results that start with "." or "-" or are empty strings
results = [result for result in results if not result.startswith(("-",".")) and result]
extended_results = [''.join([result, extension]) for result in results for extension in extensions]
return extended_results
def parse_url(url, extensions, full_url=False):
parsed_url = tldextract.extract(url)
scheme = url.split(":")[0] if ":" in url else ""
subdomain_Z = parsed_url.subdomain
subdomain_Y = parsed_url.fqdn
subdomain_Y1 = "OLD." + parsed_url.fqdn
subdomain_Z1 = "OLD-" + parsed_url.subdomain
subdomain_Y12 = "OLD." + parsed_url.subdomain
subdomain_Z12 = "OLD-" + parsed_url.fqdn
domain_Y = parsed_url.registered_domain
results_dot = combinations(subdomain_Z.replace("-", "."), '.')
results_dash = combinations(subdomain_Z.replace(".", "-"), '-')
results_dot1 = combinations(subdomain_Y.replace("-", "."), '.')
results_dash1 = combinations(subdomain_Y.replace(".", "-"), '-')
results_dot2 = combinations(subdomain_Y1.replace("-", "."), '.')
results_dash2 = combinations(subdomain_Z1.replace(".", "-"), '-')
results_dot21 = combinations(subdomain_Y12.replace("-", "."), '.')
results_dash21 = combinations(subdomain_Z12.replace(".", "-"), '-')
results_Y = [domain_Y.split(".")[0], domain_Y, domain_Y.replace(".", "-")]
results = []
results.extend(results_dot1)
results.extend(results_dash1)
results.extend(results_dot2)
results.extend(results_dash2)
results.extend(results_dot21)
results.extend(results_dash21)
results.extend(results_dot)
results.extend(results_dash)
results.extend(results_Y)
if full_url and scheme:
results = [f'{url}/{i}' for i in results]
results = concat_extensions(results, extensions)
return results
def get_extensions(ext_filepath):
with open(ext_filepath, 'r') as ext_file:
extensions = ext_file.read().splitlines()
# Check if extensions start with ".", else add one
extensions = ['.' + ext if ext[0] != '.' else ext for ext in extensions]
return extensions
def main():
# Using argparse to handle CLI arguments
parser = argparse.ArgumentParser(description='URL Wordlist Generator')
parser.add_argument('url_file', type=str, help='The file path for the URLs')
parser.add_argument('ext_file', type=str, help='The file path for the extensions')
parser.add_argument('--full', action='store_true', help='Include full urls with http/https scheme')
args = parser.parse_args()
extensions = get_extensions(args.ext_file)
with open(args.url_file, 'r') as url_file:
for url in url_file:
url = url.strip() # Removing leading/trailing whitespaces, if any
if url: # In case there are empty lines within file, just skip it
results = sorted(list(set(parse_url(url, extensions, args.full))))
print("\n".join(results))
if __name__ == "__main__":
main()