From d0188baacc71905b437a3b186c411b14ecff40a2 Mon Sep 17 00:00:00 2001
From: Thijs Damsma <tdamsma@gmail.com>
Date: Tue, 12 Feb 2019 09:15:28 +0100
Subject: [PATCH 1/4] Attempt to generalize _XlrdReader __init__ and move it to
 _BaseExcelReader

---
 pandas/io/excel/_base.py | 56 ++++++++++++++++++++++++++++++----------
 pandas/io/excel/_xlrd.py | 41 ++++++++---------------------
 2 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index ed5943e9a1698..d7eedf09a38a3 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1,27 +1,27 @@
 import abc
+import os
+import warnings
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
-import os
+from io import BytesIO
 from textwrap import fill
-import warnings
 
 import pandas.compat as compat
 from pandas.compat import add_metaclass, range, string_types, u
-from pandas.errors import EmptyDataError
-from pandas.util._decorators import Appender, deprecate_kwarg
-
-from pandas.core.dtypes.common import (
-    is_bool, is_float, is_integer, is_list_like)
-
 from pandas.core import config
+from pandas.core.dtypes.common import (is_bool, is_float, is_integer,
+                                       is_list_like)
 from pandas.core.frame import DataFrame
-
-from pandas.io.common import _NA_VALUES, _stringify_path, _validate_header_arg
-from pandas.io.excel._util import (
-    _fill_mi_header, _get_default_writer, _maybe_convert_to_string,
-    _maybe_convert_usecols, _pop_header_name, get_writer)
+from pandas.errors import EmptyDataError
+from pandas.io.common import (_NA_VALUES, _is_url, _stringify_path, _urlopen,
+                              _validate_header_arg, get_filepath_or_buffer)
+from pandas.io.excel._util import (_fill_mi_header, _get_default_writer,
+                                   _maybe_convert_to_string,
+                                   _maybe_convert_usecols, _pop_header_name,
+                                   get_writer)
 from pandas.io.formats.printing import pprint_thing
 from pandas.io.parsers import TextParser
+from pandas.util._decorators import Appender, deprecate_kwarg
 
 _read_excel_doc = """
 Read an Excel file into a pandas DataFrame.
@@ -329,6 +329,36 @@ def read_excel(io,
 @add_metaclass(abc.ABCMeta)
 class _BaseExcelReader(object):
 
+    def __init__(self, filepath_or_buffer):
+        # If filepath_or_buffer is a url, load the data into a BytesIO
+        if _is_url(filepath_or_buffer):
+            filepath_or_buffer = BytesIO(_urlopen(filepath_or_buffer).read())
+        elif not isinstance(filepath_or_buffer,
+                            (ExcelFile, self._workbook_class)):
+            filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
+                filepath_or_buffer)
+
+        if isinstance(filepath_or_buffer, self._workbook_class):
+            self.book = filepath_or_buffer
+        elif hasattr(filepath_or_buffer, "read"):
+            # N.B. xlrd.Book has a read attribute too
+            filepath_or_buffer.seek(0)
+            self.book = self.load_workbook(filepath_or_buffer)
+        elif isinstance(filepath_or_buffer, compat.string_types):
+            self.book = self.load_workbook(filepath_or_buffer)
+        else:
+            raise ValueError('Must explicitly set engine if not passing in'
+                             ' buffer or path for io.')
+
+    @property
+    @abc.abstractmethod
+    def _workbook_class(self):
+        pass
+
+    @abc.abstractmethod
+    def load_workbook(self, filepath_or_buffer):
+        pass
+
     @property
     @abc.abstractmethod
     def sheet_names(self):
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 60f7d8f94a399..0a083be39052d 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,13 +1,10 @@
 from datetime import time
 from distutils.version import LooseVersion
-from io import UnsupportedOperation
+from io import BytesIO
 
 import numpy as np
 
-import pandas.compat as compat
 from pandas.compat import range, zip
-
-from pandas.io.common import _is_url, _urlopen, get_filepath_or_buffer
 from pandas.io.excel._base import _BaseExcelReader
 
 
@@ -32,35 +29,19 @@ def __init__(self, filepath_or_buffer):
                 raise ImportError(err_msg +
                                   ". Current version " + xlrd.__VERSION__)
 
-        from pandas.io.excel._base import ExcelFile
-        # If filepath_or_buffer is a url, want to keep the data as bytes so
-        # can't pass to get_filepath_or_buffer()
-        if _is_url(filepath_or_buffer):
-            filepath_or_buffer = _urlopen(filepath_or_buffer)
-        elif not isinstance(filepath_or_buffer, (ExcelFile, xlrd.Book)):
-            filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
-                filepath_or_buffer)
-
-        if isinstance(filepath_or_buffer, xlrd.Book):
-            self.book = filepath_or_buffer
-        elif hasattr(filepath_or_buffer, "read"):
-            # N.B. xlrd.Book has a read attribute too
-            if hasattr(filepath_or_buffer, 'seek'):
-                try:
-                    # GH 19779
-                    filepath_or_buffer.seek(0)
-                except UnsupportedOperation:
-                    # HTTPResponse does not support seek()
-                    # GH 20434
-                    pass
+        self._engine = xlrd
+        super(_XlrdReader, self).__init__(filepath_or_buffer)
+
+    @property
+    def _workbook_class(self):
+        return self._engine.Book
 
+    def load_workbook(self, filepath_or_buffer):
+        if isinstance(filepath_or_buffer, BytesIO):
             data = filepath_or_buffer.read()
-            self.book = xlrd.open_workbook(file_contents=data)
-        elif isinstance(filepath_or_buffer, compat.string_types):
-            self.book = xlrd.open_workbook(filepath_or_buffer)
+            return self._engine.open_workbook(file_contents=data)
         else:
-            raise ValueError('Must explicitly set engine if not passing in'
-                             ' buffer or path for io.')
+            return self._engine.open_workbook(filepath_or_buffer)
 
     @property
     def sheet_names(self):

From a77a4c7c953f3fcf56547debd9a685c0e2b5bd00 Mon Sep 17 00:00:00 2001
From: Thijs Damsma <tdamsma@gmail.com>
Date: Mon, 29 Apr 2019 08:48:16 +0200
Subject: [PATCH 2/4] implement suggestions  @WillAyd

---
 pandas/io/excel/_xlrd.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 504932c3d72e0..dbb4030d88c34 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -29,11 +29,12 @@ def __init__(self, filepath_or_buffer):
                                   ". Current version " + xlrd.__VERSION__)
 
         self._engine = xlrd
-        super(_XlrdReader, self).__init__(filepath_or_buffer)
+        super().__init__(filepath_or_buffer)
 
     @property
     def _workbook_class(self):
-        return self._engine.Book
+        from xlrd import Book
+        return Book
 
     def load_workbook(self, filepath_or_buffer):
 

From 22e24bbae8636bcd4dec360cae7124cb9c6a3d34 Mon Sep 17 00:00:00 2001
From: Thijs Damsma <tdamsma@gmail.com>
Date: Mon, 29 Apr 2019 09:54:48 +0200
Subject: [PATCH 3/4] remove _engine keyword altogether

---
 pandas/io/excel/_xlrd.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index dbb4030d88c34..d772ad4bfedfc 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -28,7 +28,6 @@ def __init__(self, filepath_or_buffer):
                 raise ImportError(err_msg +
                                   ". Current version " + xlrd.__VERSION__)
 
-        self._engine = xlrd
         super().__init__(filepath_or_buffer)
 
     @property
@@ -37,12 +36,12 @@ def _workbook_class(self):
         return Book
 
     def load_workbook(self, filepath_or_buffer):
-
+        from xlrd import open_workbook
         if isinstance(filepath_or_buffer, (BytesIO, BufferedReader)):
             data = filepath_or_buffer.read()
-            return self._engine.open_workbook(file_contents=data)
+            return open_workbook(file_contents=data)
         else:
-            return self._engine.open_workbook(filepath_or_buffer)
+            return open_workbook(filepath_or_buffer)
 
     @property
     def sheet_names(self):

From 903b188f481f28a171d101388d7ea313f1ad847e Mon Sep 17 00:00:00 2001
From: Thijs Damsma <tdamsma@gmail.com>
Date: Tue, 30 Apr 2019 10:04:48 +0200
Subject: [PATCH 4/4] fix regression for reading s3 files

---
 pandas/io/excel/_xlrd.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index d772ad4bfedfc..18e751274dab9 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,6 +1,5 @@
 from datetime import time
 from distutils.version import LooseVersion
-from io import BufferedReader, BytesIO
 
 import numpy as np
 
@@ -37,7 +36,7 @@ def _workbook_class(self):
 
     def load_workbook(self, filepath_or_buffer):
         from xlrd import open_workbook
-        if isinstance(filepath_or_buffer, (BytesIO, BufferedReader)):
+        if hasattr(filepath_or_buffer, "read"):
             data = filepath_or_buffer.read()
             return open_workbook(file_contents=data)
         else: