-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathNews Article Summary.py
61 lines (44 loc) · 1.18 KB
/
News Article Summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# coding: utf-8
# # Create a summary of a news article
# Before we start we need to install following packages: <br>
# 1.pip3 install newspaper3k <br>
# 2.pip install -U textblob <br>
# 3.pip install requests <br>
# 4.sudo pip install -U nltk <br>
# 5.python -m textblob.download_corpora <br>
# 6.nltk.download('punkt')
#
# In[5]:
import numpy as np
import pandas as pd
import pip
import newspaper
from newspaper import Article
from textblob import TextBlob
import nltk
from nltk import word_tokenize,sent_tokenize
# In[6]:
url = 'https://www.nytimes.com/2018/11/12/obituaries/stan-lee-dead.html'
article = newspaper.Article(url)
article.download()
article.parse()
article.title
article.nlp()
article.keywords
article.summary
blob2 = TextBlob(article.text)
# In[7]:
wordlist = pd.DataFrame()
ssList=[]
for t in blob2.sentences:
ww = []
for word, tag in t.tags:
if tag in ('NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'):
ww.append(word.lemmatize())
ss = ' '.join(ww)
ssList.append(ss.lower())
# In[8]:
wordlist = wordlist.append(ssList, ignore_index=True)
wordlist
len(blob2.sentences)
wordlist.to_csv('StanLeeSummary.csv')