From b46649f8cdff8dc7833d6e67deb210186d6390ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= <anarcat@koumbit.org>
Date: Tue, 5 Jan 2016 18:04:03 -0500
Subject: [PATCH 1/2] SNI support using Python requests for .url

without this, SNI-enabled sites, which are becoming more and more
popular, are not displayed by the URL plugin

a good site to test with is: https://sni.velox.ch/

the requests API is similar enough to the `web.get` API to replace it,
but that is left to another pull request, as other plugins may not
require SNI support because they probably don't encounter the same
variety of sites as `.url`
---
 requirements.txt     |  1 +
 sopel/modules/url.py | 37 +++++++++++--------------------------
 2 files changed, 12 insertions(+), 26 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index b3856aba06..d0f3a10bf3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pytz
 praw
 pyenchant
 pygeoip
+requests
diff --git a/sopel/modules/url.py b/sopel/modules/url.py
index e18f84cb6a..58342b7df1 100644
--- a/sopel/modules/url.py
+++ b/sopel/modules/url.py
@@ -8,10 +8,12 @@
 from __future__ import unicode_literals, absolute_import, print_function, division
 
 import re
+from contextlib import closing
 from sopel import web, tools
 from sopel.module import commands, rule, example
 from sopel.config.types import ValidatedAttribute, StaticSection
 
+import requests
 
 url_finder = None
 # These are used to clean up the title tag before actually parsing it. Not the
@@ -150,14 +152,6 @@ def process_urls(bot, trigger, urls):
                 pass
             # First, check that the URL we got doesn't match
             matched = check_callbacks(bot, trigger, url, False)
-            if matched:
-                continue
-            # Then see if it redirects anywhere
-            new_url = follow_redirects(url)
-            if not new_url:
-                continue
-            # Then see if the final URL matches anything
-            matched = check_callbacks(bot, trigger, new_url, new_url != url)
             if matched:
                 continue
             # Finally, actually show the URL
@@ -167,20 +161,6 @@ def process_urls(bot, trigger, urls):
     return results
 
 
-def follow_redirects(url):
-    """
-    Follow HTTP 3xx redirects, and return the actual URL. Return None if
-    there's a problem.
-    """
-    try:
-        connection = web.get_urllib_object(url, 60)
-        url = connection.geturl() or url
-        connection.close()
-    except:
-        return None
-    return url
-
-
 def check_callbacks(bot, trigger, url, run=True):
     """
     Check the given URL against the callbacks list. If it matches, and ``run``
@@ -201,10 +181,15 @@ def check_callbacks(bot, trigger, url, run=True):
 
 def find_title(url):
     """Return the title for the given URL."""
-    try:
-        content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
-    except UnicodeDecodeError:
-        return  # Fail silently when data can't be decoded
+    with closing(requests.get(url, stream=True)) as response:
+        try:
+            content = ''
+            for line in response.iter_lines(decode_unicode=True):
+                content += line
+                if '</title>' in content or len(content) > max_bytes:
+                    break
+        except UnicodeDecodeError:
+            return  # Fail silently when data can't be decoded
 
     # Some cleanup that I don't really grok, but was in the original, so
     # we'll keep it (with the compiled regexes made global) for now.

From a4ee91f46c5f4850d974f5be5193e05e1675683b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= <anarcat@koumbit.org>
Date: Tue, 5 Jan 2016 18:27:35 -0500
Subject: [PATCH 2/2] properly close partially read request

this is because the closing() structure doesn't seem to be supported
in all cases. at least in request 2.8, the response.close() call
actually works, so we'll use that.

note that it fails in 2.4.3 (debian jessie/stable)
---
 sopel/modules/url.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/sopel/modules/url.py b/sopel/modules/url.py
index 58342b7df1..0d7aac5de1 100644
--- a/sopel/modules/url.py
+++ b/sopel/modules/url.py
@@ -181,15 +181,18 @@ def check_callbacks(bot, trigger, url, run=True):
 
 def find_title(url):
     """Return the title for the given URL."""
-    with closing(requests.get(url, stream=True)) as response:
-        try:
-            content = ''
-            for line in response.iter_lines(decode_unicode=True):
-                content += line
-                if '</title>' in content or len(content) > max_bytes:
-                    break
-        except UnicodeDecodeError:
-            return  # Fail silently when data can't be decoded
+    response = requests.get(url, stream=True)
+    try:
+        content = ''
+        for line in response.iter_lines(decode_unicode=True):
+            content += line
+            if '</title>' in content or len(content) > max_bytes:
+                break
+    except UnicodeDecodeError:
+        return  # Fail silently when data can't be decoded
+    finally:
+        # need to close the connexion because we have not read all the data
+        response.close()
 
     # Some cleanup that I don't really grok, but was in the original, so
     # we'll keep it (with the compiled regexes made global) for now.