Merge pull request #14 from asiffarhankhan/master

mandjevant · web-flow · commit b6f5f419f8a1 · 2019-09-01T11:30:09.000+02:00
modified functionalities
diff --git a/Idea/1. Headline.md b/Idea/1. Headline.md
@@ -1,4 +1,5 @@
-# 1. Headline
+# 1. Headline 
+~Not Up to date~
 
 ### Ideas for executing a headline rating
 
diff --git a/ReadMe.md b/ReadMe.md
@@ -18,4 +18,7 @@ The program aims to eliminate the prevailing dominance of Fake News all around t
 
 * AlphaPhiKappa
 * riseandshine0
+* mphirke
+* mandjevant
 * webdotorg
+
diff --git a/modules/headline.py b/modules/headline.py
@@ -1,51 +1,67 @@
-import urllib.request
-from textblob.classifiers import NaiveBayesClassifier
-from textblob import TextBlob
 import newspaper
+import nltk
+from nltk.classify import NaiveBayesClassifier
+from nltk.classify.util import accuracy
+import urllib.request
 import sys
 
-class title:
-
-    #Initialisations
-    def __init__(self):
-        self.news_url="https://edition.cnn.com/2019/08/25/politics/trump-g7-boris-johnson-emmanuel-macron/index.html"
-        try:
-            news_page=urllib.request.urlopen(self.news_url)
-        
-        except urllib.error.URLError:
-            print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet")
-            sys.exit()
-
 
-    def extract_headline(self):
-        article = newspaper.Article(self.news_url)
-        article.download()
-        article.parse()
-        return article.title.strip()
+class title:
+	#Initialisations
+	def __init__(self): 
+		self.news_url=input("\nEnter The URL : ")
+		self.pos=[] #Variable to store all positive tokens from positive_headlines.csv file
+		self.neg=[] #Variable to store all negative tokens from negative_headlines.csv file
+		
+		try:
+			self.news_page=urllib.request.urlopen(self.news_url)
+		
+		except urllib.error.URLError:
+			print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet")
+			sys.exit()
 
-    #Adding Training Data
-    def train_data(self, headline):
-        try:
-            with open('training_data.csv','r') as td:
-                cl=NaiveBayesClassifier(td,format='csv')
-                sentiment=cl.classify(headline)
-                return sentiment
+	# extract headline
+	def extract_headline(self):
+		try:
+			article = newspaper.Article(self.news_url)
+			article.download()
+			article.parse()
+			return article.title.strip()
+		except newspaper.article.ArticleException: #List possible errors in case of any exception
+			print("\nCONNECTION/URL ERROR: Article could not be retrieved.")
 
-        except:
-            print("\n\n Connection/Program Error")
+			
+	#Adding Training/Testing Data
+	def train(self,headline):
+		with open("positive_headlines.csv") as file:
+			for sentence in file:
+				self.pos.append([{word: True for word in nltk.word_tokenize(sentence)},'Positive'])
 
+		with open("negative_headlines.csv") as file:
+			for sentence in file:
+				self.neg.append([{word: True for word in nltk.word_tokenize(sentence)},'Negative'])
 
-    def headline_category(self,headline,sentiment):
+		training=self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))]
 
-        analyse_headline=TextBlob(headline)
-        print("\n"+"Headline:",headline,"\n")
-        print("Headline Sentiment:",sentiment,"\n\n")
+		classifier = NaiveBayesClassifier.train(training) #Training
+		sentiment=classifier.classify({word: True for word in nltk.word_tokenize(headline)})
+		return sentiment
 
-    def main(self):
-        hdln=self.extract_headline()
-        sntmnt=self.train_data(hdln)
-        self.headline_category(hdln,sntmnt)
+	# categorize headline
+	def headline_category(self,headline,sentiment):
+		print("\nHEADLINE  :",headline.upper())
+		print("SENTIMENT :",sentiment)
+		print("AUTHOR(S) :",*self.article.authors,'\n')
 
+		
+	# main of class
+	def main(self):
+		hdln=self.extract_headline()
+		sntmnt=self.train(hdln)
+		self.train(hdln)
+		self.headline_category(hdln,sntmnt)
+		
+		
 if __name__=='__main__':
-    do_ya_thing=title()
-    do_ya_thing.main()
+	title().main()
+	
diff --git a/modules/negative_headlines.csv b/modules/negative_headlines.csv
@@ -0,0 +1,42 @@
+aba decides against community broadcasting licence
+act fire witnesses must be aware of defamation
+air nz staff in aust strike for pay rise
+air nz strike to affect australian travellers
+aussie qualifier stosur wastes four memphis match
+australia is locked into war timetable opp
+blizzard buries united states in bills
+brigadier dismisses reports troops harassed in
+british combat troops arriving daily in kuwait
+bryant leads lakers to double overtime win
+bushfire victims urged to see centrelink
+businesses should prepare for terrorist attacks
+carews freak goal leaves roma in ruins
+cemeteries miss out on funds
+council chief executive fails to secure  position
+crean tells alp leadership critics to shut up
+dargo fire threat expected to rise
+death toll continues to climb in south korean subway
+direct anger at govt not soldiers crean urges
+dispute over at smithton vegetable processing plant
+dying korean subway passengers phoned for help
+firefighters contain acid spill
+four injured in head on highway crash
+gilchrist backs rest policy
+girl injured in head on highway crash
+govt is to blame for ethanols unpopularity opp
+griffiths under fire over project knock back
+hacker gains access to eight million credit cards
+hanson should go back where she came from nsw mp
+harrington raring to go after break
+investigation underway into elster creek spill
+iraqs neighbours plead for continued un inspections
+israeli forces push into gaza strip
+kelly not surprised ethanol confidence low
+korean subway fire 314 still missing
+low demand forces air service cuts
+man with knife hijacks light plane
+more than 40 pc of young men drink alcohol at
+more water restrictions predicted for northern tas
+petrol bombs and water cannons mark violent escalation in hong kong protests
+imran khan addresses pakistan on kashmir threatens nuclear war once again
+FIR against NCP leader Ajit Pawar 69 others in Maharashtra co-op bank scam case
diff --git a/modules/positive_headlines.csv b/modules/positive_headlines.csv
@@ -0,0 +1,45 @@
+ag calls for infrastructure protection summit
+ambitious olsson wins triple jump
+antic delighted with record breaking barca
+aust addresses un security council over iraq
+australia to contribute 10 million in aid to iraq
+barca take record as robson celebrates birthday in
+bathhouse plans move ahead
+big hopes for launceston cycling championship
+big plan to boost paroo water supplies
+commonwealth bank cuts fixed home loan rates
+community urged to help homeless youth
+councillor to contest wollongong as independent
+council moves to protect tas heritage garden
+council welcomes ambulance levy decision
+council welcomes insurance breakthrough
+dems hold plebiscite over iraqi conflict
+epa still trying to recover chemical clean up costs
+freedom records net profit for third successive
+funds allocated for domestic violence victims
+funds allocated for youth at risk
+funds announced for bridge work
+funds to go to cadell upgrade
+funds to help restore cossack
+golf club feeling smoking ban impact
+greens offer police station alternative
+hanson is grossly naive over nsw issues costa
+health minister backs organ and tissue storage
+heavy metal de posits survey nearing end
+investigations underway into death toll of korean
+iraq to pay for own rebuilding white house
+irish man arrested over omagh bombing
+irrigators vote over river management
+jury to consider verdict in murder case
+juvenile sex offenders unlikely to reoffend as
+last minute call hands alinghi big lead
+man arrested after central qld hijack attempt
+man charged over cooma murder
+man fined after aboriginal tent embassy raid
+man jailed over keno fraud
+massive drug crop discovered in western nsw
+mayor warns landfill protesters
+meeting to consider tick clearance costs
+meeting to focus on broken hill water woes
+moderate lift in wages growth
+Chandrayaan-2 maps lunar surface takes striking photos of craters on Moon
diff --git a/modules/training_data.csv b/modules/training_data.csv
diff --git a/test/test.py b/test/test.py
@@ -0,0 +1,16 @@
+def train_classifier(self,headline):
+
+    a=input("""\nIf you think the output was incorrect, Please re-label the headline's sentiment to train the classifier 
+     & help improve future predictions [p/n]: """)
+
+    if a=='p':
+        with open('positive_headlines.csv','a') as td:
+                td.write('\n'+headline)
+    elif a=='n':
+        with open('negative_headlines.csv','a') as td:
+                td.write('\n'+headline)
+    else:
+        print("Incorrect key pressed!")
+    pass
+
+self.train_classifier(hdln)