1- import urllib .request
2- from textblob .classifiers import NaiveBayesClassifier
3- from textblob import TextBlob
41import newspaper
2+ import nltk
3+ from nltk .classify import NaiveBayesClassifier
4+ from nltk .classify .util import accuracy
5+ import urllib .request
56import sys
67
7- class title :
8-
9- #Initialisations
10- def __init__ (self ):
11- self .news_url = "https://edition.cnn.com/2019/08/25/politics/trump-g7-boris-johnson-emmanuel-macron/index.html"
12- try :
13- news_page = urllib .request .urlopen (self .news_url )
14-
15- except urllib .error .URLError :
16- print ("\n CONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet" )
17- sys .exit ()
18-
198
20- def extract_headline (self ):
21- article = newspaper .Article (self .news_url )
22- article .download ()
23- article .parse ()
24- return article .title .strip ()
9+ class title :
10+ #Initialisations
11+ def __init__ (self ):
12+ self .news_url = input ("\n Enter The URL : " )
13+ self .pos = [] #Variable to store all positive tokens from positive_headlines.csv file
14+ self .neg = [] #Variable to store all negative tokens from negative_headlines.csv file
15+
16+ try :
17+ self .news_page = urllib .request .urlopen (self .news_url )
18+
19+ except urllib .error .URLError :
20+ print ("\n CONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet" )
21+ sys .exit ()
2522
26- #Adding Training Data
27- def train_data (self , headline ):
28- try :
29- with open ('training_data.csv' ,'r' ) as td :
30- cl = NaiveBayesClassifier (td ,format = 'csv' )
31- sentiment = cl .classify (headline )
32- return sentiment
23+ # extract headline
24+ def extract_headline (self ):
25+ try :
26+ article = newspaper .Article (self .news_url )
27+ article .download ()
28+ article .parse ()
29+ return article .title .strip ()
30+ except newspaper .article .ArticleException : #List possible errors in case of any exception
31+ print ("\n CONNECTION/URL ERROR: Article could not be retrieved." )
3332
34- except :
35- print ("\n \n Connection/Program Error" )
33+
34+ #Adding Training/Testing Data
35+ def train (self ,headline ):
36+ with open ("positive_headlines.csv" ) as file :
37+ for sentence in file :
38+ self .pos .append ([{word : True for word in nltk .word_tokenize (sentence )},'Positive' ])
3639
40+ with open ("negative_headlines.csv" ) as file :
41+ for sentence in file :
42+ self .neg .append ([{word : True for word in nltk .word_tokenize (sentence )},'Negative' ])
3743
38- def headline_category ( self , headline , sentiment ):
44+ training = self . pos [: int ( len ( self . pos ))] + self . neg [: int ( len ( self . neg ))]
3945
40- analyse_headline = TextBlob ( headline )
41- print ( " \n " + "Headline:" , headline , " \n " )
42- print ( "Headline Sentiment:" , sentiment , " \n \n " )
46+ classifier = NaiveBayesClassifier . train ( training ) #Training
47+ sentiment = classifier . classify ({ word : True for word in nltk . word_tokenize ( headline )} )
48+ return sentiment
4349
44- def main (self ):
45- hdln = self .extract_headline ()
46- sntmnt = self .train_data (hdln )
47- self .headline_category (hdln ,sntmnt )
50+ # categorize headline
51+ def headline_category (self ,headline ,sentiment ):
52+ print ("\n HEADLINE :" ,headline .upper ())
53+ print ("SENTIMENT :" ,sentiment )
54+ print ("AUTHOR(S) :" ,* self .article .authors ,'\n ' )
4855
56+
57+ # main of class
58+ def main (self ):
59+ hdln = self .extract_headline ()
60+ sntmnt = self .train (hdln )
61+ self .train (hdln )
62+ self .headline_category (hdln ,sntmnt )
63+
64+
4965if __name__ == '__main__' :
50- do_ya_thing = title ()
51- do_ya_thing . main ()
66+ title (). main ()
67+
0 commit comments