Skip to content

Commit

Permalink
Issue webcompat#1131 - Adds status, creation and last changed time to…
Browse files Browse the repository at this point in the history
… issue db
  • Loading branch information
deepthivenkat committed Jul 20, 2016
1 parent f976bc8 commit 7131fce
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 4 deletions.
1 change: 1 addition & 0 deletions config/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ Pillow==3.1.1
requests==2.9.1
ua-parser==0.6.1
WTForms==2.1
tldextract>=1.7.5
51 changes: 51 additions & 0 deletions tests/test_webhooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

'''Tests for helper methods in webcompat/webhooks/helpers.py.'''

import os.path
import sys
import unittest

import webcompat
from webcompat.webhooks.helpers import extract_domain_name

# Add webcompat module to import path
sys.path.append(os.path.realpath(os.pardir))


WWW_DOMAIN_URL = "www.net"
RESULT_WWW_DOMAIN_URL = "net"
BLOGSPOT_URL = "http://blogsofnote.blogspot.com/"
RESULT_BLOGSPOT_URL = "blogspot"
SUBSITE_URL = "https://mail.google.com"
RESULT_SUBSITE_URL = "mail.google"
SUBSITE_WWW_URL = "https://www.mail.google.com"
RESULT_SUBSITE_WWW_URL = "mail.google"
NETFLIX_URL = "https://www.netflix.co.uk"
RESULT_NETFLIX_URL = "netflix"


class TestHelpers(unittest.TestCase):
def setUp(self):
webcompat.app.config['TESTING'] = True
self.app = webcompat.app.test_client()

def tearDown(self):
pass

def test_extract_domain_name(self):
'''Test for different combinations of domain names.'''
self.assertEqual(
extract_domain_name(WWW_DOMAIN_URL), RESULT_WWW_DOMAIN_URL)
self.assertEqual(
extract_domain_name(BLOGSPOT_URL),
RESULT_BLOGSPOT_URL)
self.assertEqual(extract_domain_name(SUBSITE_URL), RESULT_SUBSITE_URL)
self.assertEqual(
extract_domain_name(SUBSITE_WWW_URL),
RESULT_SUBSITE_WWW_URL)
self.assertEqual(extract_domain_name(NETFLIX_URL), RESULT_NETFLIX_URL)
17 changes: 17 additions & 0 deletions webcompat/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from flask import abort
from flask import Blueprint
from flask import g
from flask import jsonify
from flask import request
from flask import session

Expand All @@ -25,6 +26,7 @@
from webcompat.helpers import mockable_response
from webcompat.helpers import normalize_api_params
from webcompat.helpers import proxy_request
from webcompat.db.helpers import domain_search

api = Blueprint('api', __name__, url_prefix='/api')
JSON_MIME = 'application/json'
Expand Down Expand Up @@ -250,3 +252,18 @@ def get_rate_limit():
See https://developer.github.com/v3/rate_limit/.
'''
return api_request('get', 'rate_limit')


@api.route('/issues/domainsearch/<domain>')
def get_same_domain_issues(domain):
'''Endpoint to get issues with similar domain from server data dump.
Will return domains of which the queried domain is a substring.
'''
content = json.dumps(domain_search(domain))
if content:
status_code = 200
headers_dictionary = {'Content-Type': 'application/json'}
return (content, status_code, headers_dictionary)
else:
# Error carrying out request.
abort(404)
13 changes: 12 additions & 1 deletion webcompat/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column
from sqlalchemy import DateTime
from sqlalchemy import String
from sqlalchemy.orm import scoped_session
from sqlalchemy.orm import sessionmaker
Expand Down Expand Up @@ -37,13 +38,23 @@ class WCIssue(IssueBase):
issue_id = Column(String(128), unique=True, primary_key=True)
summary = Column(String(256))
url = Column(String(1024))
domain = Column(String(1024))
body = Column(String(2048))
status = Column(String(256))
creation_time = Column(DateTime)
last_change_time = Column(DateTime)

def __init__(self, issue_id, summary, url, body):

def __init__(self, issue_id, summary, url, domain, body, status, creation_time, last_change_time):
self.issue_id = issue_id
self.summary = summary
self.url = url
self.domain = domain
self.body = body
self.status = status
self.creation_time = creation_time
self.last_change_time = last_change_time


IssueBase.metadata.create_all(bind=issue_engine)

Expand Down
31 changes: 31 additions & 0 deletions webcompat/db/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from webcompat import app
from webcompat.db import issue_db
from webcompat.db import WCIssue


def row_to_dict(row):
d = {}
for column in row.__table__.columns:
if(column.name == 'issue_id' or column.name == 'domain' or
column.name == 'summary'):
d[column.name] = str(getattr(row, column.name))
return d


def domain_search(search_domain):
'''Returns the ten most recent issues with a similar domain name'''
search_domain += "%"
session = issue_db()
query_result = (
session.query(WCIssue)
.filter(WCIssue.domain.like(search_domain))
.limit(10)
.all())
result_dict = [row_to_dict(r) for r in query_result]
return result_dict
5 changes: 4 additions & 1 deletion webcompat/webhooks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@ def hooklistener():
issue_body = payload.get('issue')['body']
issue_title = payload.get('issue')['title']
issue_number = payload.get('issue')['number']
issue_status = payload.get('issue')['status']
issue_creation_time = payload.get('issue')['created_at']
issue_last_change_time = payload.get('issue')['updated_at']
parse_and_set_label(issue_body, issue_number)
# Setting "Needs Triage" label by default
# to all the new issues raised
set_label('status-needstriage', issue_number)
dump_to_db(issue_title, issue_body, issue_number)
dump_to_db(issue_title, issue_body, issue_number, issue_status, issue_creation_time, issue_last_change_time)
return ('gracias, amigo.', 200)
else:
return ('cool story, bro.', 200)
Expand Down
28 changes: 26 additions & 2 deletions webcompat/webhooks/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import re
import requests
import tldextract

from webcompat import app
from webcompat.db import issue_db
Expand Down Expand Up @@ -49,7 +50,30 @@ def set_label(label, issue_number):
api_post('labels', payload, issue_number)


def dump_to_db(title, body, issue_number):
def extract_domain_name(url):
'''Extract the domain name from a given URL'''
prefix_blacklist = 'www.'
domain_blackList = ['.google.com', '.live.com', '.yahoo.com', '.go.com']
parts = tldextract.extract(url)
# Handles specific cases where 'www' is the domain (www.net, www.org)
if parts.domain == '' or parts.domain == 'www':
return parts.suffix
# Using only the domain in large domains with a number of subdomains would
# not yield much information. To improve accuracy, we include the subdomain
# in the domain
elif any(domain in url for domain in domain_blackList):
subdomain = parts.subdomain
if prefix_blacklist in subdomain:
# Handles cases of starting 'www' included in subdomain
subdomain = parts.subdomain.replace(prefix_blacklist, '')
return '.'.join([subdomain, parts.domain])
else:
return parts.domain


def dump_to_db(title, body, issue_number, status, creation_time, last_change_time):
'''Store issue details to issue_db'''
url = extract_url(body)
issue_db.add(WCIssue(issue_number, title, url, body))
domain = extract_domain_name(url)
issue_db.add(WCIssue(issue_number, title, url, domain, body, status, creation_time, last_change_time))
issue_db.commit()

0 comments on commit 7131fce

Please sign in to comment.