-
Notifications
You must be signed in to change notification settings - Fork 1
/
linkfinder.py
71 lines (64 loc) · 2.71 KB
/
linkfinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from sys import argv
from bs4 import BeautifulSoup, Comment
import argparse
import requests
import os
parser = argparse.ArgumentParser()
parser.add_argument('--url', help='The target url')
parser.add_argument('-f', help='The files with the urls')
parser.add_argument('-o', help='The output file')
parser.add_argument('--comments', help='Grab the comments on the page', action='store_true')
args = parser.parse_args()
if args.f:
args.f = os.path.abspath(args.f)
try:
os.mkdir('LinkFinder')
os.chdir('LinkFinder')
except FileExistsError:
os.chdir('LinkFinder')
with open(args.f, 'r') as urls:
for url in urls:
url = url.replace('\n', '')
try:
resp = requests.get(url).text
soup = BeautifulSoup(resp, 'html.parser')
if args.comments:
print(f'{"=" * 20} {url} {"=" * 20}\n')
for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
with open(url.replace('http://', '').replace('https://', '')+'_comments', 'a+') as output:
for link in soup.find_all('a', href=True):
print(link['href'])
output.write(link['href']+'\n')
else:
with open(url.replace('http://', '').replace('https://', ''), 'a+') as output:
for link in soup.find_all('a', href=True):
print(link['href'])
output.write(link['href']+'\n')
except ConnectionRefusedError:
print('Connection Refused at', url)
except Exception as err:
print('Something gone WRONG!')
print(err)
elif args.url:
try:
resp = requests.get(args.url).text
soup = BeautifulSoup(resp, 'html.parser')
if args.o and args.comments:
print(f'{"=" * 20} {args.url} {"=" * 20}\n')
with open(args.o, 'a+') as output:
for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
print(comment, '\n')
output.write(comment+'\n')
elif args.o:
with open(args.o, 'w+') as output:
for link in soup.find_all('a', href=True):
print(link['href'])
output.write(link['href']+'\n')
else:
for link in soup.find_all('a', href=True):
print(link['href'])
except ConnectionRefusedError:
print('Connection Refused at', url)
except Exception as err:
print('Somthing gone WRONG!')
print(err)