From 4283479bcd356f0b97d7ba5476690bec16ae20de Mon Sep 17 00:00:00 2001
From: Hafsa25 <46001323+Hafsa25@users.noreply.github.com>
Date: Thu, 14 Mar 2019 23:23:41 +0500
Subject: [PATCH] sarcasm_detection

updated file
---
 sarcasm_detection | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 sarcasm_detection

diff --git a/sarcasm_detection b/sarcasm_detection
new file mode 100644
index 0000000..adf5e4d
--- /dev/null
+++ b/sarcasm_detection
@@ -0,0 +1,36 @@
+import pandas as pd 
+import nltk
+nltk.download('stopwords')  #Uncomment if stopwords aren't already dowloaded 
+import re
+from nltk.corpus import stopwords
+from nltk.stem.porter import PorterStemmer
+from sklearn.feature_extraction.text import CountVectorizer
+
+path = "C:/News Headline/Sarcasm_Headlines_Dataset.json"
+
+try:
+    dataset = pd.read_json(path,lines = True)
+    
+    dataset = dataset.drop("article_link", axis = 1)
+    
+    X = dataset.headline
+    y = dataset.is_sarcastic
+    
+    corpus = []
+    print("The process will take time be patient!")
+    for i in range(0,len(X)):
+        review = re.sub('[^a-zA-Z]', ' ', X[i])
+        review = review.lower()
+        review = review.split()
+        ps = PorterStemmer()
+        review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
+        review = ' '.join(review)
+        corpus.append(review)
+        print(i)
+
+except:
+        print("the file path i does'nt contain a json file")
+        print("check the path.")
+cv=CountVectorizer()
+vector=cv.fit_transform(corpus)
+vector.toarray()