diff --git a/README.md b/README.md index 79bde46..8baab20 100644 --- a/README.md +++ b/README.md @@ -17,19 +17,35 @@ The syntax for the script is the following: **release0_1 \[command] [arg]** # Commands -url: pass a url as an argument to test for. This only tests the argument link, and is not recursive. -example: **release0_1 url https://www.youtube.com/watch?v=zILpjFqlOak** +## url +**url**: pass a url as an argument to test for. This only tests the argument link, and is not recursive. + + example: **release0_1 url https://www.youtube.com/watch?v=zILpjFqlOak** -file: pass a file as an argument to test links inside. If the file has multiple links, it will test those as well. +**--l**: an option for the **url** command that allows you to search through a website for dead links given the url + + example: **release0_1 url --l https://www.youtube.com/watch?v=zILpjFqlOak** -example: **release0_1 file test.html** +**--s**: an option for the **url** command that allows you to search through a website for http links and sees if they work as https + + example: **release0_1 url --s https://www.youtube.com/watch?v=zILpjFqlOak** -version: returns you the version of this code +## file -example: **release0_1 version** +**file**: pass a file as an argument to test links inside. If the file has multiple links, it will test those as well. ---help: gives you the lists of commands for help + example: **release0_1 file test.html** -example: **release0_1 --help** +**--s**: an option for the **file** command that allows you to search through a file for http links and sees if they work as https + + example: **release0_1 url --s C:\Users\user1\Documents\file.html** + +**version**: returns you the version of this code + + example: **release0_1 version** + +**--help**: gives you the lists of commands for help + + example: **release0_1 --help** diff --git a/release0_1.py b/release0_1.py index 534510e..6efb68f 100644 --- a/release0_1.py +++ b/release0_1.py @@ -1,31 +1,53 @@ import click import urllib3 import re +# check this with Click documentation from colorama import Fore import sys -def basic_file_read(file, *args): +def get_urls(data, *args): s = args[0] + pattern = re.findall(r'https?:[a-zA-Z0-9_.+-/#~]+', data) + for l in pattern: + q = l.strip() + test_request(q) + if (s): + isHttp = re.match('(http)', q) + if (isHttp): + q = re.sub('(http)', 'https', q) + test_request(q) + + +def website_read(q, s): try: - file_data = open(file,'r',encoding="utf-8") - pattern = re.findall(r'https?:[a-zA-Z0-9_.+-/#~]+', file_data.read()) - for l in pattern: - q = l.strip() - test_request(q) - if(s): - isHttp = re.match('(http)', q) - if(isHttp): - q = re.sub('(http)','https', q) - test_request(q) + h = urllib3.PoolManager() + response = h.request('GET', q, timeout=5.0) + except: + print("This is an invalid link, please try again") + else: + try: + get_urls(response.data.decode('ISO-8859-1'), s) + except: + print("An error has occurred when retrieving the website") + + +def basic_file_read(file, *args): + try: + file_data = open(file, 'r', encoding="utf-8") + get_urls(file_data.read(), *args) except OSError: print("The file cannot be opened! Make sure this file can be read and is legit.") + def test_request(q): + # accepts one url + # try adding more support for other html codes try: h = urllib3.PoolManager() req = h.request('HEAD', q) - if(req.status == 200): + if (req.status == 200): + # this is not working with my machine. Perhaps check Click documentation and see what is going on with this print(Fore.GREEN + f"{q} passes with {req.status}!") elif (req.status == 403): print(Fore.WHITE + f"{q} looks sus, it returned {req.status}") @@ -34,10 +56,12 @@ def test_request(q): except: print("Unknown Error: " + str(sys.exc_info()[0])) + @click.group() def cli(): pass + @cli.command('file') @click.argument('file') @click.option('--s', is_flag=True, default=False, help='Optional flag to check if https can be used instead of http') @@ -45,11 +69,19 @@ def file_reader(file, s): """this reads URL links from a file!""" basic_file_read(file, s) + @cli.command('url') @click.argument('url') -def url_reader(url): +@click.option('--l', is_flag=True, default=False, help='look through the given website recursively and check the webpage for ' + 'dead links') +@click.option('--s', is_flag=True, default=False, help='Optional flag to check if https can be used instead of http') +def url_reader(url, l, s): """this reads a URL that you pass as an argument!""" - test_request(str(url)) + if l: + website_read(url, s) + else: + test_request(str(url)) + @cli.command('version') def version_check(): diff --git a/setup.py b/setup.py index 0df9c72..e3aba86 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,12 @@ from setuptools import setup + setup( -name = 'release0_1', -version = '0.1', -install_requires = ['Click','urllib3','colorama'], -py_modules = ['release0_1'], + name='release0_1', + version='0.1', + install_requires=['Click', 'urllib3', 'Colorama'], + py_modules=['release0_1'], -entry_points={ -'console_scripts': -['release0_1=release0_1:cli']} + entry_points={ + 'console_scripts': + ['release0_1=release0_1:cli']} )