Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mobad committed Aug 15, 2017
1 parent 9026bd8 commit 9819efb
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,5 @@ ENV/

# mypy
.mypy_cache/
*.cfg
*.db
7 changes: 7 additions & 0 deletions rfd.cfg.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[rfd]
db_path = rfd.db
emails = email@example.com
sections = /hot-deals-f9/

#emails = email1@exmaple.com, email2@emample.com
#sections = /hot-deals-f9/, /coupons-f136/
Binary file added rfd.db.sample
Binary file not shown.
1 change: 1 addition & 0 deletions rfd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CREATE TABLE rfd(url VARCHAR NOT NULL PRIMARY KEY);
96 changes: 96 additions & 0 deletions rfdalert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/python3

import sqlite3
import re
import datetime
import configparser
import os
from urllib.request import Request
from urllib.request import urlopen
from subprocess import Popen, PIPE
import dateutil.parser
from bs4 import BeautifulSoup

config = configparser.ConfigParser()
config.read(['rfd.cfg', os.path.expanduser('~/.rfd.cfg')])
config = config['rfd']

conn = sqlite3.connect(config['db_path'])
c = conn.cursor()

emails = [email.strip() for email in config['emails'].split(',')]

rfdUrl = "http://forums.redflagdeals.com"
rfdSections = [section.strip() for section in config['sections'].split(',')]

for section in rfdSections:
userAgent = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36'}
req = Request(rfdUrl+section, None, userAgent)
content = urlopen(req, timeout=10).read().decode('utf-8', 'ignore')

soup = BeautifulSoup(content)

for thread in soup.findAll("li", {"class" : "topic"}):
# Get post date.
firstPostSoup = thread.find("span", {"class":"first-post-time"})
if not firstPostSoup: continue

threadCreateDate = dateutil.parser.parse(firstPostSoup.string)
rfdtime = threadCreateDate.replace(tzinfo=dateutil.tz.gettz('America/Toronto'))
utctime = rfdtime.astimezone(dateutil.tz.tzutc())
daysOld = (datetime.datetime.utcnow().replace(tzinfo=dateutil.tz.tzutc()) - utctime).days + 1

# Get post votes.
voteSoup = thread.find("dl", {"class":"post_voting"})
if not voteSoup: continue

votesStr = re.sub('[^0-9-]', '', voteSoup['data-total'])
votes = int(votesStr if votesStr != '' else 0)

# Get number of comments.
postSoup = thread.find("div", {"class":"posts"})
if not postSoup: continue
posts = int(re.sub('[^0-9]', '', postSoup.string))

if (votes < 5*daysOld and posts < 10*daysOld) or votes < 0: continue

# Get title.
titleSoup = thread.find("h3", {"class" : "topictitle"})
if not titleSoup: continue

title = ""
for part in titleSoup:
if part.string is not None:
title += part.string.strip() + " "
title = title.strip()

link = rfdUrl+titleSoup.find("a", {"class" : "topic_title_link"})['href']

# Check whether post is in the database.
c.execute('''SELECT COUNT(*) FROM rfd WHERE url=?''', (link,))
if c.fetchone()[0] > 0: continue

print(title + " " + link + " " + str(posts) + " " + str(votes))

# Fetch post contents
threadReq = Request(link, None, userAgent)
threadContent = urlopen(threadReq, timeout=10).read()
threadSoup = BeautifulSoup(threadContent)
for elem in threadSoup.findAll(['script', 'style']):
elem.extract()

dealSoup = threadSoup.find("div", {"class":"post_content"})
if not dealSoup: continue

# Convert named links to destination link.
for a in dealSoup.findAll('a'):
a.string = a['href']

# TODO: Use HTML emails so images and links work.
p = Popen(["mutt", "-s", str(posts) + "|" + str(votes) + ": " + title, ' '.join(emails)], stdin=PIPE)
p.communicate(input=bytes(link+"\n"+dealSoup.text, 'UTF-8'))
if p.returncode != 0: continue
print(p.returncode)
c.execute('''INSERT INTO rfd VALUES (?)''', (link,))
conn.commit()
conn.close()

0 comments on commit 9819efb

Please sign in to comment.