Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code Formator and Basic brainyquotes test added #26

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions pyquotes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__all__ = ["pyquotes", "brainyquote", "scrapper"]
42 changes: 21 additions & 21 deletions pyquotes/brainyquote.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@

def get_author_link(person):
author_name = person.lower()
author_name_split = author_name.split(' ')
author_url_link = ''
author_name_split = author_name.split(" ")
sayanmondal2098 marked this conversation as resolved.
Show resolved Hide resolved
author_url_link = ""
count = 0

for i in author_name_split:
author_url_link += i
count += 1
if count is not len(author_name_split):
author_url_link += '_'
author_url_link += "_"

author_url_link = author_url_link.replace('.', '_')
author_url_link = author_url_link.replace(".", "_")
sayanmondal2098 marked this conversation as resolved.
Show resolved Hide resolved

return author_url_link

Expand All @@ -36,33 +36,33 @@ def get_quotes(person, category):
"""
URL = "https://www.brainyquote.com/authors/" + get_author_link(person)
respone_author = requests.get(URL)
soup_author = BeautifulSoup(respone_author.content, 'html5lib')
categories = soup_author.find_all('div', class_='kw-box')
soup_author = BeautifulSoup(respone_author.content, "html5lib")
sayanmondal2098 marked this conversation as resolved.
Show resolved Hide resolved
categories = soup_author.find_all("div", class_="kw-box")
check = False
count = 0
for i in categories:
a = i.text
replace = a.replace("\n", '')
replace = a.replace("\n", "")
r = replace.lower()
if category in r:
check = True
count += 1

# Getting the quote of the related author
get_quote = soup_author.find_all('a', attrs={'title': 'view quote'})
get_quote = soup_author.find_all("a", attrs={"title": "view quote"})
quote_list = []
big_list = []
for i in range(count):
quote_list.append(get_quote[i].text)
big_list.append(quote_list)

if len(quote_list) == 0:
return('''Oops! It seems that there are no quotes of the author of that
return """Oops! It seems that there are no quotes of the author of that
category.
\nYou may consider changing the category or the author ''')
\nYou may consider changing the category or the author """
quote_list.append(person)

return(quote_list)
return quote_list


def get_quote(person, category):
Expand All @@ -76,16 +76,16 @@ def get_quote(person, category):
"""
quotes = get_quotes(person, category)
length = len(quotes)
if(length == 0):
if length == 0:
# In case no quote of the author exist for that category.
return("No quotes found of that category")
return "No quotes found of that category"
else:
random_number = random.randint(0, length - 1)
list = []
list.append(quotes[random_number])
list.append(person)

return(tuple(list))
return tuple(list)


def get_quote_of_the_day():
Expand All @@ -99,26 +99,26 @@ def get_quote_of_the_day():
# Sending a HTTP request to the specified URL and saving the response
# from server in a response object called response.
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html5lib')
a_tags = soup.findAll('img', alt=True)
soup = BeautifulSoup(response.content, "html5lib")
a_tags = soup.findAll("img", alt=True)

# Getting all the a tags of the page.
quote_of_the_day_atag = str(a_tags[0])

# Grabbing the first a tag of the page
matches = re.findall(r'\"(.+?)\"', quote_of_the_day_atag)
matches = re.findall(r"\"(.+?)\"", quote_of_the_day_atag)

# A regular expression which gives a list of all
# text that is in between quotes.
quote_author_split_list = str(matches[0]).split('-')
quote_author_split_list = str(matches[0]).split("-")

# Get a list of quote_of_the_day and the author
quote_of_the_day = matches[0].replace(quote_author_split_list[-1], '')
quote_of_the_day = quote_of_the_day.replace('-', '')
quote_of_the_day = matches[0].replace(quote_author_split_list[-1], "")
quote_of_the_day = quote_of_the_day.replace("-", "")
author_name = quote_author_split_list[-1]

# Gives the author_name
author_name = author_name.replace(' ', '')
author_name = author_name.replace(" ", "")

# Removes any extra space
return (quote_of_the_day, author_name)
6 changes: 2 additions & 4 deletions pyquotes/pyquotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
# 2. Get the quote of the day


def get_quotes(person: (None, str) = None,
category: (None, str) = None):
def get_quotes(person: (None, str) = None, category: (None, str) = None):
"""
This function returns all the quotes that matches the input.

Expand All @@ -16,8 +15,7 @@ def get_quotes(person: (None, str) = None,
pass


def get_quote(person: (None, str) = None,
category: (None, str) = None):
def get_quote(person: (None, str) = None, category: (None, str) = None):
"""
This function take a category and a person as a input and returns
a random quote which matches the input.
Expand Down
95 changes: 61 additions & 34 deletions pyquotes/scrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
# 2. Get the quote of the day


def get_quotes(person: (None, str) = None,
category: (None, str) = None):
def get_quotes(person: (None, str) = None, category: (None, str) = None):
"""
This function returns all the quotes that matches the input.
:param person: Name of the person e.g. Albert Einstein
Expand All @@ -22,8 +21,7 @@ def get_quotes(person: (None, str) = None,
return crawler(person, category)


def get_quote(person: (None, str) = None,
category: (None, str) = None):
def get_quote(person: (None, str) = None, category: (None, str) = None):
"""
This function take a category and a person as a input and returns
a random quote which matches the input.
Expand All @@ -33,7 +31,7 @@ def get_quote(person: (None, str) = None,
"""
quotes_and_authors = crawler(person, category)
if len(quotes_and_authors) > 1:
index = random.randint(0, len(quotes_and_authors)-1)
index = random.randint(0, len(quotes_and_authors) - 1)
else:
index = 0
return quotes_and_authors[index]
Expand All @@ -46,52 +44,65 @@ def get_quote_of_the_day():
"""
page_number = random.randint(1, 912)
test = 1
url = "https://api.quotery.com/wp-json/quotery/v1/quotes?orderby=popular&page=" + \
str(page_number)+"&per_page=120"
url = (
"https://api.quotery.com/wp-json/quotery/v1/quotes?orderby=popular&page="
+ str(page_number)
+ "&per_page=120"
)
quote, authors, test = scraper(url, test)
quotes_and_authors = selection_general(quote, authors)
index = random.randint(0, len(quotes_and_authors)-1)
index = random.randint(0, len(quotes_and_authors) - 1)
return quotes_and_authors[index]


def scraper(url, test):
authors = []
quotes = []
# Used a header to fake a browser
source = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'}).text
soup = BeautifulSoup(source, 'lxml')
source = requests.get(
url,
headers={
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
},
).text
soup = BeautifulSoup(source, "lxml")
para = soup.p.text
# all these random split is how the data was arranged in the source from which it had to be stripped
para = para.split("\"quotes\"")
para = para.split('"quotes"')
if len(para) > 1:
para = para[1]
else:
para = para[0]
count = 0
para = para.split("\"status\"")
para = para.split('"status"')
if len(para) == 1:
para = para[0]
quote_list = para.split("\"body\"")
quote_list = para.split('"body"')
for index, element in enumerate(quote_list):
quote_element = element.split("\"images\"")
quote_element = element.split('"images"')
if index > 0:
for inner_index, quote in enumerate(quote_element):
if inner_index == 0:
# Cleaning the quote and using the encode decode to remove Unicode escape chracters
cleaned_quote = quote[2:len(
quote)-2].encode('utf-8').decode('unicode-escape')
cleaned_quote = (
quote[2: len(quote) - 2]
.encode("utf-8")
.decode("unicode-escape")
)
quotes.append(cleaned_quote)

author_list = para.split("\"name\"")
author_list = para.split('"name"')
for index, element in enumerate(author_list):
author_element = element.split("\"slug\"")
author_element = element.split('"slug"')
if index > 0:
for inner_index, author in enumerate(author_element):
if inner_index == 0:
# Cleaning the name of the author and using the encode decode to remove Unicode escape chracters
cleaned_author = author[2:len(
author)-2].encode('utf-8').decode('unicode-escape')
cleaned_author = (
author[2: len(author) - 2]
.encode("utf-8")
.decode("unicode-escape")
)
authors.append(cleaned_author)
count += 1
else:
Expand Down Expand Up @@ -123,13 +134,13 @@ def crawler(user_author=None, user_topic=None):
authors = []
quote = []
quotes_and_authors = []
punctuations = (",", "-", "'", ".", '"', '_', '\\', '“', '”', '*')
punctuations = (",", "-", "'", ".", '"', "_", "\\", "“", "”", "*")
new_word = ""
expected_author = []
expected_topic = []

file_topic = open('topics.txt', 'r')
file_author = open('authors.txt', 'r')
file_topic = open("topics.txt", "r")
file_author = open("authors.txt", "r")

# Using FuzzyWuzzy to match input entry to the entries in the website. We are using text files from scrapped from the website for it.

Expand All @@ -150,18 +161,24 @@ def crawler(user_author=None, user_topic=None):
for index, char in enumerate(topic):
if char not in punctuations:
new_word = new_word + char
new_word = '-'.join(new_word.split(" "))
new_word = "-".join(new_word.split(" "))
i = 1
test = 1
while test:
url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?topic="
+ new_word + "&page=" + str(i) + "&per_page=120")
url = (
"https://api.quotery.com/wp-json/quotery/v1/quotes?topic="
+ new_word
+ "&page="
+ str(i)
+ "&per_page=120"
)
new_word = ""
i += 1
for author in expected_author:
quote, authors, test = scraper(url, test)
quotes_and_authors += selection_author(
quote, authors, author)
quote, authors, author
)
return quotes_and_authors
else:
# show random quotes from the topic
Expand All @@ -173,9 +190,14 @@ def crawler(user_author=None, user_topic=None):
for index, char in enumerate(topic):
if char not in punctuations:
new_word = new_word + char
new_word = '-'.join(new_word.split(" "))
url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?topic="
+ new_word + "&page=" + str(i) + "&per_page=120")
new_word = "-".join(new_word.split(" "))
url = (
"https://api.quotery.com/wp-json/quotery/v1/quotes?topic="
+ new_word
+ "&page="
+ str(i)
+ "&per_page=120"
)
new_word = ""
i += 1
quote, authors, test = scraper(url, test)
Expand All @@ -195,10 +217,15 @@ def crawler(user_author=None, user_topic=None):
for index, char in enumerate(author):
if char not in punctuations:
new_word = new_word + char
new_word = '-'.join(new_word.split(" "))
new_word = "-".join(new_word.split(" "))
while test:
url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?author="
+ new_word + "&page=" + str(i) + "&per_page=120")
url = (
"https://api.quotery.com/wp-json/quotery/v1/quotes?author="
+ new_word
+ "&page="
+ str(i)
+ "&per_page=120"
)
i += 1
quote, authors, test = scraper(url, test)
quotes_and_authors += selection_general(quote, authors)
Expand Down
21 changes: 21 additions & 0 deletions tests/codeformattest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import unittest
import pep8


class TestCodeFormat(unittest.TestCase):
def test_pep8_conformance(self):
"""Test that we conform to PEP8. checks all project files"""
errors = 0
style = pep8.StyleGuide(quiet=False)
style.options.max_line_length = 120
for root, dirs, files in os.walk("."):
python_files = [os.path.join(root, f)
for f in files if f.endswith(".py")]
errors = style.check_files(python_files).total_errors

self.assertEqual(errors, 0, "PEP8 style errors: %d" % errors)


if __name__ == "__main__":
unittest.main()
13 changes: 13 additions & 0 deletions tests/test_brainyquotes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import requests


class TestBrainyQuotes:
def TestBrainyQuotesUp():

r = requests.head("https://www.brainyquote.com/")
print("Website Up")
return r.status_code == 200


if __name__ == "__main__":
TestBrainyQuotes.TestBrainyQuotesUp()