Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions modules/headline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from bs4 import BeautifulSoup
from textblob.classifiers import NaiveBayesClassifier
from textblob import TextBlob
import newspaper
import nltk

class title:

Expand All @@ -12,16 +14,18 @@ def __init__(self):

def extract_headline(self):
self.net_con=True #Expecting Internet Connection to be working initially

try:
news_page=urllib.request.urlopen(self.news_url)
soup = BeautifulSoup(news_page,'html.parser')
headline_in_html=soup.find('h1')
headline=headline_in_html.text.strip()
return headline

article = newspaper.Article(self.news_url)
article.download()
article.parse()

except urllib.error.URLError:
print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet")
self.net_con=False #Value update if the program is unable to connenct
article.title = "Invalid URL/Could not extract title"
return article.title.strip()



#Adding Training Data
Expand Down