-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from MLH-Fellowship/staging
Staging
- Loading branch information
Showing
76 changed files
with
18,593 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy | ||
# More GitHub Actions for Azure: https://github.com/Azure/actions | ||
|
||
name: Build and deploy Python app to Azure Web App - twitstat | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
build-and-deploy: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@master | ||
|
||
- name: Set up Python version | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: '3.8' | ||
|
||
- name: Build using AppService-Build | ||
uses: azure/appservice-build@v2 | ||
with: | ||
platform: python | ||
platform-version: '3.8' | ||
|
||
- name: 'Deploy to Azure Web App' | ||
uses: azure/webapps-deploy@v2 | ||
with: | ||
app-name: 'twitstat' | ||
slot-name: 'production' | ||
publish-profile: ${{ secrets.AzureAppService_PublishProfile_f35043737dd4487e9131f39d0ff4e657 }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy | ||
# More GitHub Actions for Azure: https://github.com/Azure/actions | ||
|
||
name: Build and deploy Python app to Azure Web App - twitstat(staging) | ||
|
||
on: | ||
push: | ||
branches: | ||
- staging | ||
|
||
jobs: | ||
build-and-deploy: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@master | ||
|
||
- name: Set up Python version | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: '3.8' | ||
|
||
- name: Build using AppService-Build | ||
uses: azure/appservice-build@v2 | ||
with: | ||
platform: python | ||
platform-version: '3.8' | ||
|
||
- name: Setting up Virtual Environment | ||
run: | | ||
python3 -m venv venv | ||
source venv/bin/activate | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r requirements/production.txt | ||
- name: 'Deploy to Azure Web App' | ||
uses: azure/webapps-deploy@v2 | ||
with: | ||
app-name: 'twitstat' | ||
slot-name: 'staging' | ||
publish-profile: ${{ secrets.AzureAppService_PublishProfile_6d60a0be539140669ccdea72aea4def0 }} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,42 @@ | ||
from flask import Flask | ||
import sys | ||
|
||
app = Flask(__name__) | ||
import pandas as pd | ||
from flask import Flask, jsonify | ||
|
||
from apps.process_data import ProcessData, Results | ||
from apps.tweets import Twitter | ||
from loguru import logger | ||
from flask import render_template | ||
|
||
@app.route("/") | ||
def hello_world(): | ||
return "Hello, World!" | ||
logger.add( | ||
"logs/twitstat.log", | ||
colorize=True, | ||
format="<green>{time}</green> <level>{message}</level>", | ||
rotation="50 MB", | ||
backtrace=True, | ||
diagnose=True, | ||
) | ||
app = Flask(__name__, | ||
static_url_path='', | ||
static_folder='./static', | ||
template_folder='./templates') | ||
|
||
|
||
@app.route("/", methods=["GET"]) | ||
def analyze_tweets(): | ||
# twitter = Twitter() | ||
# top_trends = twitter.get_top_trends() | ||
# logger.info(f"Top trends now are {top_trends}") | ||
# trending_tweets = twitter.get_trending_tweets(top_trends[0]["name"]) | ||
# df = pd.DataFrame(trending_tweets) | ||
# esp = 1.29 | ||
# df, clusters_count = ProcessData().cluster(esp, df) | ||
# res, clusters_count = Results(df).get_result() | ||
# logger.info(f"Clusters: {clusters_count}") | ||
# result = {} | ||
# for ind, row in res.iterrows(): | ||
# result[ind] = dict(row) | ||
# response = dict() | ||
# response["cluserts_count"] = clusters_count.to_json() | ||
# response["result"] = result | ||
return render_template('index.html') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import re | ||
import string | ||
|
||
|
||
class CleanData: | ||
def de_emojify(self, tweet): | ||
"""Remove emoticons from given text""" | ||
regrex_pattern = re.compile( | ||
pattern="[" | ||
"\U0001F600-\U0001F64F" # emoticons | ||
"\U0001F300-\U0001F5FF" # symbols & pictographs | ||
"\U0001F680-\U0001F6FF" # transport & map symbols | ||
"\U0001F1E0-\U0001F1FF" # flags (iOS) | ||
"\U00002500-\U00002BEF" # chinese char | ||
"\U00002702-\U000027B0" | ||
"\U00002702-\U000027B0" | ||
"\U000024C2-\U0001F251" | ||
"\U0001f926-\U0001f937" | ||
"\U00010000-\U0010ffff" | ||
"\u2640-\u2642" | ||
"\u2600-\u2B55" | ||
"\u200d" | ||
"\u23cf" | ||
"\u23e9" | ||
"\u231a" | ||
"\ufe0f" # dingbats | ||
"\u3030" | ||
"]+", | ||
flags=re.UNICODE, | ||
) | ||
return regrex_pattern.sub(r"", tweet) | ||
|
||
def remove_punctuation(self, tweet): | ||
"""Remove links and other punctuation from text""" | ||
tweet = tweet.replace("\n", "") | ||
tweet = tweet.replace("\t", "") | ||
re.sub(r"http\S+", "", tweet) # removes links | ||
|
||
translator = str.maketrans("", "", string.punctuation) | ||
return tweet.lower().translate(translator) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import logging | ||
|
||
import nltk | ||
from sklearn.cluster import DBSCAN | ||
from sklearn.feature_extraction.text import TfidfVectorizer | ||
from textblob import TextBlob | ||
|
||
from apps.clean_data import CleanData | ||
|
||
try: | ||
from nltk import word_tokenize, sent_tokenize | ||
except ImportError: | ||
nltk.download("punkt") | ||
from nltk import word_tokenize, sent_tokenize | ||
|
||
|
||
class ProcessData: | ||
def __init__(self): | ||
self.porter_stemmer = nltk.PorterStemmer() | ||
self.clean_data = CleanData() | ||
|
||
def tokenize(self, tweet): | ||
"""Stem and tokenizes input text, used as custom tokenizer in tfi-df vectorization""" | ||
tokens = nltk.word_tokenize(tweet) | ||
stems = [] | ||
for item in tokens: | ||
stems.append(self.porter_stemmer.stem(item)) | ||
return stems | ||
|
||
def analyse_sentiment(self, tweet): | ||
"""Analyses the sentiment of the given tweet""" | ||
analysis = TextBlob(tweet) | ||
sentiment = analysis.sentiment.polarity | ||
if sentiment > 0: | ||
return "positive" | ||
elif sentiment == 0: | ||
return "neutral" | ||
else: | ||
return "negative" | ||
|
||
def cluster(self, esp, df): | ||
"""Clusters data using DBSCAN with a specified esp value""" | ||
df["tweet_clean"] = df["tweets"].apply( | ||
lambda y: self.clean_data.remove_punctuation(y) | ||
) | ||
df["tweet_clean"] = df["tweet_clean"].apply( | ||
lambda y: self.clean_data.de_emojify(y) | ||
) | ||
|
||
vectorizer = TfidfVectorizer( | ||
tokenizer=self.tokenize, stop_words="english", min_df=1 | ||
) | ||
x = vectorizer.fit_transform(df.loc[:, "tweet_clean"]) | ||
|
||
db = DBSCAN(esp, min_samples=20).fit(x) | ||
|
||
df["clusters"] = db.labels_ | ||
logging.info(f"Number of unique clusters generated: {df.clusters.nunique()}") | ||
|
||
return df, df.clusters.nunique() | ||
|
||
|
||
class Results: | ||
def __init__(self, df): | ||
"""Initialize final results of the analysis""" | ||
self.df = df | ||
self.clusters_count = df.clusters.value_counts() | ||
|
||
def get_result(self): | ||
df_results = self.df.groupby(["clusters"]).max().reset_index() | ||
df_results["sentiment"] = df_results["tweet_clean"].apply( | ||
lambda y: ProcessData().analyse_sentiment(y) | ||
) | ||
return df_results, self.clusters_count |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.