Skip to content

Commit

Permalink
Merge pull request #26 from MLH-Fellowship/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
ramanaditya authored Oct 12, 2020
2 parents 760a1b3 + 5c56bc8 commit d3eac80
Show file tree
Hide file tree
Showing 76 changed files with 18,593 additions and 7 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/main_twitstat.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
# More GitHub Actions for Azure: https://github.com/Azure/actions

name: Build and deploy Python app to Azure Web App - twitstat

on:
push:
branches:
- main

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@master

- name: Set up Python version
uses: actions/setup-python@v1
with:
python-version: '3.8'

- name: Build using AppService-Build
uses: azure/appservice-build@v2
with:
platform: python
platform-version: '3.8'

- name: 'Deploy to Azure Web App'
uses: azure/webapps-deploy@v2
with:
app-name: 'twitstat'
slot-name: 'production'
publish-profile: ${{ secrets.AzureAppService_PublishProfile_f35043737dd4487e9131f39d0ff4e657 }}
44 changes: 44 additions & 0 deletions .github/workflows/staging_twitstat(staging).yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
# More GitHub Actions for Azure: https://github.com/Azure/actions

name: Build and deploy Python app to Azure Web App - twitstat(staging)

on:
push:
branches:
- staging

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@master

- name: Set up Python version
uses: actions/setup-python@v1
with:
python-version: '3.8'

- name: Build using AppService-Build
uses: azure/appservice-build@v2
with:
platform: python
platform-version: '3.8'

- name: Setting up Virtual Environment
run: |
python3 -m venv venv
source venv/bin/activate
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/production.txt
- name: 'Deploy to Azure Web App'
uses: azure/webapps-deploy@v2
with:
app-name: 'twitstat'
slot-name: 'staging'
publish-profile: ${{ secrets.AzureAppService_PublishProfile_6d60a0be539140669ccdea72aea4def0 }}
2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion .idea/twitstat.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ Contribution Guidelines

* Please read our `Code of Conduct <./CODE_OF_CONDUCT.md>`__.

Contributors
-------------

Made with :heart: by `Aditya Raman <https://github.com/ramanaditya>`_ and `Garima Singh <https://github.com/grimmmyshini>`_!

License
--------

Expand Down
44 changes: 39 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,42 @@
from flask import Flask
import sys

app = Flask(__name__)
import pandas as pd
from flask import Flask, jsonify

from apps.process_data import ProcessData, Results
from apps.tweets import Twitter
from loguru import logger
from flask import render_template

@app.route("/")
def hello_world():
return "Hello, World!"
logger.add(
"logs/twitstat.log",
colorize=True,
format="<green>{time}</green> <level>{message}</level>",
rotation="50 MB",
backtrace=True,
diagnose=True,
)
app = Flask(__name__,
static_url_path='',
static_folder='./static',
template_folder='./templates')


@app.route("/", methods=["GET"])
def analyze_tweets():
# twitter = Twitter()
# top_trends = twitter.get_top_trends()
# logger.info(f"Top trends now are {top_trends}")
# trending_tweets = twitter.get_trending_tweets(top_trends[0]["name"])
# df = pd.DataFrame(trending_tweets)
# esp = 1.29
# df, clusters_count = ProcessData().cluster(esp, df)
# res, clusters_count = Results(df).get_result()
# logger.info(f"Clusters: {clusters_count}")
# result = {}
# for ind, row in res.iterrows():
# result[ind] = dict(row)
# response = dict()
# response["cluserts_count"] = clusters_count.to_json()
# response["result"] = result
return render_template('index.html')
40 changes: 40 additions & 0 deletions apps/clean_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import re
import string


class CleanData:
def de_emojify(self, tweet):
"""Remove emoticons from given text"""
regrex_pattern = re.compile(
pattern="["
"\U0001F600-\U0001F64F" # emoticons
"\U0001F300-\U0001F5FF" # symbols & pictographs
"\U0001F680-\U0001F6FF" # transport & map symbols
"\U0001F1E0-\U0001F1FF" # flags (iOS)
"\U00002500-\U00002BEF" # chinese char
"\U00002702-\U000027B0"
"\U00002702-\U000027B0"
"\U000024C2-\U0001F251"
"\U0001f926-\U0001f937"
"\U00010000-\U0010ffff"
"\u2640-\u2642"
"\u2600-\u2B55"
"\u200d"
"\u23cf"
"\u23e9"
"\u231a"
"\ufe0f" # dingbats
"\u3030"
"]+",
flags=re.UNICODE,
)
return regrex_pattern.sub(r"", tweet)

def remove_punctuation(self, tweet):
"""Remove links and other punctuation from text"""
tweet = tweet.replace("\n", "")
tweet = tweet.replace("\t", "")
re.sub(r"http\S+", "", tweet) # removes links

translator = str.maketrans("", "", string.punctuation)
return tweet.lower().translate(translator)
74 changes: 74 additions & 0 deletions apps/process_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import logging

import nltk
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob

from apps.clean_data import CleanData

try:
from nltk import word_tokenize, sent_tokenize
except ImportError:
nltk.download("punkt")
from nltk import word_tokenize, sent_tokenize


class ProcessData:
def __init__(self):
self.porter_stemmer = nltk.PorterStemmer()
self.clean_data = CleanData()

def tokenize(self, tweet):
"""Stem and tokenizes input text, used as custom tokenizer in tfi-df vectorization"""
tokens = nltk.word_tokenize(tweet)
stems = []
for item in tokens:
stems.append(self.porter_stemmer.stem(item))
return stems

def analyse_sentiment(self, tweet):
"""Analyses the sentiment of the given tweet"""
analysis = TextBlob(tweet)
sentiment = analysis.sentiment.polarity
if sentiment > 0:
return "positive"
elif sentiment == 0:
return "neutral"
else:
return "negative"

def cluster(self, esp, df):
"""Clusters data using DBSCAN with a specified esp value"""
df["tweet_clean"] = df["tweets"].apply(
lambda y: self.clean_data.remove_punctuation(y)
)
df["tweet_clean"] = df["tweet_clean"].apply(
lambda y: self.clean_data.de_emojify(y)
)

vectorizer = TfidfVectorizer(
tokenizer=self.tokenize, stop_words="english", min_df=1
)
x = vectorizer.fit_transform(df.loc[:, "tweet_clean"])

db = DBSCAN(esp, min_samples=20).fit(x)

df["clusters"] = db.labels_
logging.info(f"Number of unique clusters generated: {df.clusters.nunique()}")

return df, df.clusters.nunique()


class Results:
def __init__(self, df):
"""Initialize final results of the analysis"""
self.df = df
self.clusters_count = df.clusters.value_counts()

def get_result(self):
df_results = self.df.groupby(["clusters"]).max().reset_index()
df_results["sentiment"] = df_results["tweet_clean"].apply(
lambda y: ProcessData().analyse_sentiment(y)
)
return df_results, self.clusters_count
3 changes: 3 additions & 0 deletions apps/tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def get_trending_tweets(self, find_word):
tweets_list.append(tweets)

tweet_counter += 1

if tweet_counter > 1000:
break

return tweets_list

Expand Down
7 changes: 7 additions & 0 deletions frontend/css/bootstrap.min.css

Large diffs are not rendered by default.

Loading

0 comments on commit d3eac80

Please sign in to comment.