Skip to content

Commit b6f5f41

Browse files
authored
Merge pull request #14 from asiffarhankhan/master
modified functionalities
2 parents 01f875f + cd6271d commit b6f5f41

File tree

7 files changed

+163
-141
lines changed

7 files changed

+163
-141
lines changed

Idea/1. Headline.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# 1. Headline
1+
# 1. Headline
2+
~Not Up to date~
23

34
### Ideas for executing a headline rating
45

ReadMe.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,7 @@ The program aims to eliminate the prevailing dominance of Fake News all around t
1818

1919
* AlphaPhiKappa
2020
* riseandshine0
21+
* mphirke
22+
* mandjevant
2123
* webdotorg
24+

modules/headline.py

Lines changed: 55 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,67 @@
1-
import urllib.request
2-
from textblob.classifiers import NaiveBayesClassifier
3-
from textblob import TextBlob
41
import newspaper
2+
import nltk
3+
from nltk.classify import NaiveBayesClassifier
4+
from nltk.classify.util import accuracy
5+
import urllib.request
56
import sys
67

7-
class title:
8-
9-
#Initialisations
10-
def __init__(self):
11-
self.news_url="https://edition.cnn.com/2019/08/25/politics/trump-g7-boris-johnson-emmanuel-macron/index.html"
12-
try:
13-
news_page=urllib.request.urlopen(self.news_url)
14-
15-
except urllib.error.URLError:
16-
print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet")
17-
sys.exit()
18-
198

20-
def extract_headline(self):
21-
article = newspaper.Article(self.news_url)
22-
article.download()
23-
article.parse()
24-
return article.title.strip()
9+
class title:
10+
#Initialisations
11+
def __init__(self):
12+
self.news_url=input("\nEnter The URL : ")
13+
self.pos=[] #Variable to store all positive tokens from positive_headlines.csv file
14+
self.neg=[] #Variable to store all negative tokens from negative_headlines.csv file
15+
16+
try:
17+
self.news_page=urllib.request.urlopen(self.news_url)
18+
19+
except urllib.error.URLError:
20+
print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet")
21+
sys.exit()
2522

26-
#Adding Training Data
27-
def train_data(self, headline):
28-
try:
29-
with open('training_data.csv','r') as td:
30-
cl=NaiveBayesClassifier(td,format='csv')
31-
sentiment=cl.classify(headline)
32-
return sentiment
23+
# extract headline
24+
def extract_headline(self):
25+
try:
26+
article = newspaper.Article(self.news_url)
27+
article.download()
28+
article.parse()
29+
return article.title.strip()
30+
except newspaper.article.ArticleException: #List possible errors in case of any exception
31+
print("\nCONNECTION/URL ERROR: Article could not be retrieved.")
3332

34-
except:
35-
print("\n\n Connection/Program Error")
33+
34+
#Adding Training/Testing Data
35+
def train(self,headline):
36+
with open("positive_headlines.csv") as file:
37+
for sentence in file:
38+
self.pos.append([{word: True for word in nltk.word_tokenize(sentence)},'Positive'])
3639

40+
with open("negative_headlines.csv") as file:
41+
for sentence in file:
42+
self.neg.append([{word: True for word in nltk.word_tokenize(sentence)},'Negative'])
3743

38-
def headline_category(self,headline,sentiment):
44+
training=self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))]
3945

40-
analyse_headline=TextBlob(headline)
41-
print("\n"+"Headline:",headline,"\n")
42-
print("Headline Sentiment:",sentiment,"\n\n")
46+
classifier = NaiveBayesClassifier.train(training) #Training
47+
sentiment=classifier.classify({word: True for word in nltk.word_tokenize(headline)})
48+
return sentiment
4349

44-
def main(self):
45-
hdln=self.extract_headline()
46-
sntmnt=self.train_data(hdln)
47-
self.headline_category(hdln,sntmnt)
50+
# categorize headline
51+
def headline_category(self,headline,sentiment):
52+
print("\nHEADLINE :",headline.upper())
53+
print("SENTIMENT :",sentiment)
54+
print("AUTHOR(S) :",*self.article.authors,'\n')
4855

56+
57+
# main of class
58+
def main(self):
59+
hdln=self.extract_headline()
60+
sntmnt=self.train(hdln)
61+
self.train(hdln)
62+
self.headline_category(hdln,sntmnt)
63+
64+
4965
if __name__=='__main__':
50-
do_ya_thing=title()
51-
do_ya_thing.main()
66+
title().main()
67+

modules/negative_headlines.csv

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
aba decides against community broadcasting licence
2+
act fire witnesses must be aware of defamation
3+
air nz staff in aust strike for pay rise
4+
air nz strike to affect australian travellers
5+
aussie qualifier stosur wastes four memphis match
6+
australia is locked into war timetable opp
7+
blizzard buries united states in bills
8+
brigadier dismisses reports troops harassed in
9+
british combat troops arriving daily in kuwait
10+
bryant leads lakers to double overtime win
11+
bushfire victims urged to see centrelink
12+
businesses should prepare for terrorist attacks
13+
carews freak goal leaves roma in ruins
14+
cemeteries miss out on funds
15+
council chief executive fails to secure position
16+
crean tells alp leadership critics to shut up
17+
dargo fire threat expected to rise
18+
death toll continues to climb in south korean subway
19+
direct anger at govt not soldiers crean urges
20+
dispute over at smithton vegetable processing plant
21+
dying korean subway passengers phoned for help
22+
firefighters contain acid spill
23+
four injured in head on highway crash
24+
gilchrist backs rest policy
25+
girl injured in head on highway crash
26+
govt is to blame for ethanols unpopularity opp
27+
griffiths under fire over project knock back
28+
hacker gains access to eight million credit cards
29+
hanson should go back where she came from nsw mp
30+
harrington raring to go after break
31+
investigation underway into elster creek spill
32+
iraqs neighbours plead for continued un inspections
33+
israeli forces push into gaza strip
34+
kelly not surprised ethanol confidence low
35+
korean subway fire 314 still missing
36+
low demand forces air service cuts
37+
man with knife hijacks light plane
38+
more than 40 pc of young men drink alcohol at
39+
more water restrictions predicted for northern tas
40+
petrol bombs and water cannons mark violent escalation in hong kong protests
41+
imran khan addresses pakistan on kashmir threatens nuclear war once again
42+
FIR against NCP leader Ajit Pawar 69 others in Maharashtra co-op bank scam case

modules/positive_headlines.csv

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
ag calls for infrastructure protection summit
2+
ambitious olsson wins triple jump
3+
antic delighted with record breaking barca
4+
aust addresses un security council over iraq
5+
australia to contribute 10 million in aid to iraq
6+
barca take record as robson celebrates birthday in
7+
bathhouse plans move ahead
8+
big hopes for launceston cycling championship
9+
big plan to boost paroo water supplies
10+
commonwealth bank cuts fixed home loan rates
11+
community urged to help homeless youth
12+
councillor to contest wollongong as independent
13+
council moves to protect tas heritage garden
14+
council welcomes ambulance levy decision
15+
council welcomes insurance breakthrough
16+
dems hold plebiscite over iraqi conflict
17+
epa still trying to recover chemical clean up costs
18+
freedom records net profit for third successive
19+
funds allocated for domestic violence victims
20+
funds allocated for youth at risk
21+
funds announced for bridge work
22+
funds to go to cadell upgrade
23+
funds to help restore cossack
24+
golf club feeling smoking ban impact
25+
greens offer police station alternative
26+
hanson is grossly naive over nsw issues costa
27+
health minister backs organ and tissue storage
28+
heavy metal de posits survey nearing end
29+
investigations underway into death toll of korean
30+
iraq to pay for own rebuilding white house
31+
irish man arrested over omagh bombing
32+
irrigators vote over river management
33+
jury to consider verdict in murder case
34+
juvenile sex offenders unlikely to reoffend as
35+
last minute call hands alinghi big lead
36+
man arrested after central qld hijack attempt
37+
man charged over cooma murder
38+
man fined after aboriginal tent embassy raid
39+
man jailed over keno fraud
40+
massive drug crop discovered in western nsw
41+
mayor warns landfill protesters
42+
meeting to consider tick clearance costs
43+
meeting to focus on broken hill water woes
44+
moderate lift in wages growth
45+
Chandrayaan-2 maps lunar surface takes striking photos of craters on Moon

modules/training_data.csv

Lines changed: 0 additions & 101 deletions
This file was deleted.

test/test.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
def train_classifier(self,headline):
2+
3+
a=input("""\nIf you think the output was incorrect, Please re-label the headline's sentiment to train the classifier
4+
& help improve future predictions [p/n]: """)
5+
6+
if a=='p':
7+
with open('positive_headlines.csv','a') as td:
8+
td.write('\n'+headline)
9+
elif a=='n':
10+
with open('negative_headlines.csv','a') as td:
11+
td.write('\n'+headline)
12+
else:
13+
print("Incorrect key pressed!")
14+
pass
15+
16+
self.train_classifier(hdln)

0 commit comments

Comments
 (0)