Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNI support using Python requests for .url #988

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pytz
praw
pyenchant
pygeoip
requests
34 changes: 11 additions & 23 deletions sopel/modules/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
from __future__ import unicode_literals, absolute_import, print_function, division

import re
from contextlib import closing
from sopel import web, tools
from sopel.module import commands, rule, example
from sopel.config.types import ValidatedAttribute, StaticSection

import requests

url_finder = None
# These are used to clean up the title tag before actually parsing it. Not the
Expand Down Expand Up @@ -150,14 +152,6 @@ def process_urls(bot, trigger, urls):
pass
# First, check that the URL we got doesn't match
matched = check_callbacks(bot, trigger, url, False)
if matched:
continue
# Then see if it redirects anywhere
new_url = follow_redirects(url)
if not new_url:
continue
# Then see if the final URL matches anything
matched = check_callbacks(bot, trigger, new_url, new_url != url)
if matched:
continue
# Finally, actually show the URL
Expand All @@ -167,20 +161,6 @@ def process_urls(bot, trigger, urls):
return results


def follow_redirects(url):
"""
Follow HTTP 3xx redirects, and return the actual URL. Return None if
there's a problem.
"""
try:
connection = web.get_urllib_object(url, 60)
url = connection.geturl() or url
connection.close()
except:
return None
return url


def check_callbacks(bot, trigger, url, run=True):
"""
Check the given URL against the callbacks list. If it matches, and ``run``
Expand All @@ -201,10 +181,18 @@ def check_callbacks(bot, trigger, url, run=True):

def find_title(url):
"""Return the title for the given URL."""
response = requests.get(url, stream=True)
try:
content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
content = ''
for line in response.iter_lines(decode_unicode=True):
content += line
if '</title>' in content or len(content) > max_bytes:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't this catch such a case? wouldn't len(content) > max_bytes?

break
except UnicodeDecodeError:
return # Fail silently when data can't be decoded
finally:
# need to close the connexion because we have not read all the data
response.close()

# Some cleanup that I don't really grok, but was in the original, so
# we'll keep it (with the compiled regexes made global) for now.
Expand Down