From 8ece8cfb1dfb1870ef9a36f1a9ed8675a907b3b4 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Sat, 24 Jan 2015 16:09:30 -0500
Subject: [PATCH 001/239] Changed uin8 to uint8 in response to issue #9266

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 637612d5fb09d..99fb24ebf91dd 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -991,7 +991,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                 try:
                     values = lib.map_infer(values, conv_f)
                 except ValueError:
-                    mask = lib.ismember(values, na_values).view(np.uin8)
+                    mask = lib.ismember(values, na_values).view(np.uint8)
                     values = lib.map_infer_mask(values, conv_f, mask)
                 coerce_type = False
 

From d550e64a2ebe54d2f74a625997a3c1b6543968fa Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Sat, 24 Jan 2015 18:33:44 -0500
Subject: [PATCH 002/239] Added test for GH 9266

---
 pandas/io/tests/test_parsers.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 35530a7f5e07f..014ebf299f07d 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2984,6 +2984,28 @@ def test_variable_width_unicode(self):
         tm.assert_frame_equal(expected, read_fwf(BytesIO(test.encode('utf8')),
                                                  header=None, encoding='utf8'))
 
+    def test_convert_to_nd_arrays(self):
+        #GH 9266
+        with open('test.txt','w') as f:
+            f.write(
+            """1421302964.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127
+               1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""
+                )
+        try:
+            pd.read_fwf('test.txt', colspecs=[(0,17),(25,26),(33,37),(49,51),(58,62),(63,1000)],
+                      names=['time','pri','pgn','dst','src','data'],
+                      converters={'pgn':lambda x: int(x,16),
+                                  'src':lambda x: int(x,16),
+                                  'dst':lambda x: int(x,16),
+                                  'data':lambda x: len(x.split(' '))},
+                                   index_col='time')
+        except AttributeError:
+              self.assertIn('Error with read_fwf function.')
+
+            
+            
+            
+
 
 class TestCParserHighMemory(ParserTests, tm.TestCase):
 

From e7d90fe7b8cab73c09e451593e9da672761cfd77 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Mon, 26 Jan 2015 22:02:24 -0500
Subject: [PATCH 003/239] Trying again with new testing function

---
 pandas/io/tests/test_parsers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 014ebf299f07d..b52b9a472e8e4 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2988,8 +2988,8 @@ def test_convert_to_nd_arrays(self):
         #GH 9266
         with open('test.txt','w') as f:
             f.write(
-            """1421302964.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127
-               1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""
+                """1421302964.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127 \n"""
+            +   """1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""
                 )
         try:
             pd.read_fwf('test.txt', colspecs=[(0,17),(25,26),(33,37),(49,51),(58,62),(63,1000)],

From 5ea14a510d3cd1dd636ad186ba2b42eb9ae52b55 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Sun, 5 Apr 2015 20:23:04 -0400
Subject: [PATCH 004/239] Modified the test_to_nd_array test to make it clearer
 what was being tested.

---
 pandas/io/tests/test_parsers.py | 35 +++++++++++++++------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index b52b9a472e8e4..c2a7f814e17c3 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2986,25 +2986,22 @@ def test_variable_width_unicode(self):
 
     def test_convert_to_nd_arrays(self):
         #GH 9266
-        with open('test.txt','w') as f:
-            f.write(
-                """1421302964.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127 \n"""
-            +   """1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""
-                )
-        try:
-            pd.read_fwf('test.txt', colspecs=[(0,17),(25,26),(33,37),(49,51),(58,62),(63,1000)],
-                      names=['time','pri','pgn','dst','src','data'],
-                      converters={'pgn':lambda x: int(x,16),
-                                  'src':lambda x: int(x,16),
-                                  'dst':lambda x: int(x,16),
-                                  'data':lambda x: len(x.split(' '))},
-                                   index_col='time')
-        except AttributeError:
-              self.assertIn('Error with read_fwf function.')
-
-            
-            
-            
+        with tm.ensure_clean('test.txt') as path:
+            with open(path,'w') as f:
+                f.write(
+                        """1421302964.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127 \n"""
+                    +   """1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""
+                       )
+        
+                result =  pd.read_fwf('test.txt', colspecs=[(0,17),(25,26),(33,37),(49,51),(58,62),(63,1000)],
+                                      names=['time','pri','pgn','dst','src','data'],
+                                      converters={'pgn':lambda x: int(x,16),
+                                                  'src':lambda x: int(x,16),
+                                                  'dst':lambda x: int(x,16),
+                                                  'data':lambda x: len(x.split(' '))},
+                                      index_col='time')
+                self.assertEqual(result['dst'].dtype,np.uint8)
+        
 
 
 class TestCParserHighMemory(ParserTests, tm.TestCase):

From 650e39813a6fda7818f9d8e488b562de4bf6ba00 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Sun, 5 Apr 2015 20:45:03 -0400
Subject: [PATCH 005/239] Added documentation for bug fix on issue #9266

---
 doc/source/whatsnew/v0.16.1.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 05c762b91b925..160778ed9f8ca 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -80,3 +80,5 @@ Bug Fixes
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
+
+- Bug in ``ParserBase.convert_to_nd_arrays`` when called by ``DataFrame.read_fwf`` (:issue:`9266``) 

From 5779cbdfc19355249149d9460ab7820333bfb4d6 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Sun, 24 May 2015 11:06:48 -0400
Subject: [PATCH 006/239] Rebased

---
 pandas/io/tests/test_parsers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index c2a7f814e17c3..1bf8f7fef7b6b 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -17,6 +17,7 @@
 from pandas.compat import(
     StringIO, BytesIO, PY3, range, long, lrange, lmap, u
 )
+
 from pandas.io.common import URLError
 import pandas.io.parsers as parsers
 from pandas.io.parsers import (read_csv, read_table, read_fwf,

From d31428afba1a86c0a2f808dc3076ffc59ca02a1b Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Wed, 1 Apr 2015 20:44:20 -0400
Subject: [PATCH 007/239] Fix zlib and blosc imports

---
 pandas/io/packers.py | 34 +++++-----------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index b3e2e16af54c2..04321296d4646 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -65,26 +65,7 @@
 # until we can pass this into our conversion functions,
 # this is pretty hacky
 compressor = None
-_IMPORTS = False
-_BLOSC = False
 
-def _importers():
-    # import things we need
-    # but make this done on a first use basis
-
-    global _IMPORTS
-    if _IMPORTS:
-        return
-
-    _IMPORTS = True
-
-    global  _BLOSC
-    import zlib
-    try:
-        import blosc
-        _BLOSC = True
-    except:
-        pass
 
 def to_msgpack(path_or_buf, *args, **kwargs):
     """
@@ -103,7 +84,6 @@ def to_msgpack(path_or_buf, *args, **kwargs):
     compress : type of compressor (zlib or blosc), default to None (no
                compression)
     """
-    _importers()
     global compressor
     compressor = kwargs.pop('compress', None)
     append = kwargs.pop('append', None)
@@ -146,7 +126,6 @@ def read_msgpack(path_or_buf, iterator=False, **kwargs):
     obj : type of object stored in file
 
     """
-    _importers()
     path_or_buf, _ = get_filepath_or_buffer(path_or_buf)
     if iterator:
         return Iterator(path_or_buf)
@@ -232,9 +211,10 @@ def convert(values):
 
         # convert to a bytes array
         v = v.tostring()
+        import zlib
         return zlib.compress(v)
 
-    elif compressor == 'blosc' and _BLOSC:
+    elif compressor == 'blosc':
 
         # return string arrays like they are
         if dtype == np.object_:
@@ -242,6 +222,7 @@ def convert(values):
 
         # convert to a bytes array
         v = v.tostring()
+        import blosc
         return blosc.compress(v, typesize=dtype.itemsize)
 
     # ndarray (on original dtype)
@@ -254,18 +235,13 @@ def unconvert(values, dtype, compress=None):
         return np.array(values, dtype=object)
 
     if compress == 'zlib':
-
+        import zlib
         values = zlib.decompress(values)
         return np.frombuffer(values, dtype=dtype)
 
     elif compress == 'blosc':
-
-        if not _BLOSC:
-            raise Exception("cannot uncompress w/o blosc")
-
-        # decompress
+        import blosc
         values = blosc.decompress(values)
-
         return np.frombuffer(values, dtype=dtype)
 
     # from a string

From b49d6fd60d16f7c049c78871a48a1c579320a7de Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Wed, 1 Apr 2015 22:57:25 -0400
Subject: [PATCH 008/239] Add missing keys and tests

---
 pandas/io/packers.py            | 16 +++++++++++-----
 pandas/io/tests/test_packers.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 04321296d4646..75ca44fd1ef3e 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -234,6 +234,8 @@ def unconvert(values, dtype, compress=None):
     if dtype == np.object_:
         return np.array(values, dtype=object)
 
+    values = values.encode('latin1')
+
     if compress == 'zlib':
         import zlib
         values = zlib.decompress(values)
@@ -245,7 +247,7 @@ def unconvert(values, dtype, compress=None):
         return np.frombuffer(values, dtype=dtype)
 
     # from a string
-    return np.fromstring(values.encode('latin1'), dtype=dtype)
+    return np.fromstring(values, dtype=dtype)
 
 
 def encode(obj):
@@ -261,7 +263,8 @@ def encode(obj):
                     'name': getattr(obj, 'name', None),
                     'freq': getattr(obj, 'freqstr', None),
                     'dtype': obj.dtype.num,
-                    'data': convert(obj.asi8)}
+                    'data': convert(obj.asi8),
+                    'compress': compressor}
         elif isinstance(obj, DatetimeIndex):
             tz = getattr(obj, 'tz', None)
 
@@ -275,19 +278,22 @@ def encode(obj):
                     'dtype': obj.dtype.num,
                     'data': convert(obj.asi8),
                     'freq': getattr(obj, 'freqstr', None),
-                    'tz': tz}
+                    'tz': tz,
+                    'compress': compressor}
         elif isinstance(obj, MultiIndex):
             return {'typ': 'multi_index',
                     'klass': obj.__class__.__name__,
                     'names': getattr(obj, 'names', None),
                     'dtype': obj.dtype.num,
-                    'data': convert(obj.values)}
+                    'data': convert(obj.values),
+                    'compress': compressor}
         else:
             return {'typ': 'index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
                     'dtype': obj.dtype.num,
-                    'data': convert(obj.values)}
+                    'data': convert(obj.values),
+                    'compress': compressor}
     elif isinstance(obj, Series):
         if isinstance(obj, SparseSeries):
             raise NotImplementedError(
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 9633f567ab098..992de2b67bf49 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -446,6 +446,37 @@ def test_sparse_panel(self):
                               check_panel_type=True)
 
 
+class TestCompression(TestPackers):
+
+    def setUp(self):
+        super(TestCompression, self).setUp()
+        data = {
+            'A': np.arange(1000, dtype=float),
+            'B': range(1000),
+            'C': list(100 * 'abcdefghij'),
+        }
+        self.frame = {
+            'float': DataFrame(dict([(k, data[k]) for k in ['A', 'A']])),
+            'int': DataFrame(dict([(k, data[k]) for k in ['B', 'B']])),
+            'mixed': DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C']])),
+        }
+
+    def test_plain(self):
+        i_rec = self.encode_decode(self.frame, compress='zlib')
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k], i_rec[k])
+
+    def test_compression_zlib(self):
+        i_rec = self.encode_decode(self.frame, compress='zlib')
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k], i_rec[k])
+
+    def test_compression_blosc(self):
+        i_rec = self.encode_decode(self.frame, compress='blosc')
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k], i_rec[k])
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

From 7e8d6dd05ef508cc5a089a36b50871ac1524492e Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Wed, 1 Apr 2015 23:03:54 -0400
Subject: [PATCH 009/239] Make test_plain actually plain

---
 pandas/io/tests/test_packers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 992de2b67bf49..74ef8bf20f70c 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -462,7 +462,7 @@ def setUp(self):
         }
 
     def test_plain(self):
-        i_rec = self.encode_decode(self.frame, compress='zlib')
+        i_rec = self.encode_decode(self.frame)
         for k in self.frame.keys():
             assert_frame_equal(self.frame[k], i_rec[k])
 

From 6ce2447e765d87eb340fbc83d98af05f06d3be0a Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Wed, 1 Apr 2015 23:49:26 -0400
Subject: [PATCH 010/239] Add blosc to .travis.yml

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 6c4d6897a69de..1b1c797fa7e08 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -121,6 +121,7 @@ before_install:
   - echo $VIRTUAL_ENV
   - export PATH="$HOME/miniconda/bin:$PATH"
   - sudo apt-get install ccache
+  - pip install -U blosc
   - df -h
   - date
   - pwd

From 6aab111269e71fc28a3bd08fd5a42df058786dfd Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 00:28:55 -0400
Subject: [PATCH 011/239] Add blosc to requirements

---
 .travis.yml                    | 1 -
 ci/requirements-2.6.txt        | 1 +
 ci/requirements-2.7.txt        | 1 +
 ci/requirements-2.7_32.txt     | 1 +
 ci/requirements-2.7_64.txt     | 1 +
 ci/requirements-2.7_LOCALE.txt | 1 +
 ci/requirements-2.7_SLOW.txt   | 1 +
 ci/requirements-3.2.txt        | 1 +
 ci/requirements-3.3.txt        | 1 +
 ci/requirements-3.4.txt        | 1 +
 ci/requirements-3.4_32.txt     | 1 +
 ci/requirements-3.4_64.txt     | 1 +
 ci/requirements-3.4_SLOW.txt   | 1 +
 13 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1b1c797fa7e08..6c4d6897a69de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -121,7 +121,6 @@ before_install:
   - echo $VIRTUAL_ENV
   - export PATH="$HOME/miniconda/bin:$PATH"
   - sudo apt-get install ccache
-  - pip install -U blosc
   - df -h
   - date
   - pwd
diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt
index 9b338cee26801..a26f260ab5841 100644
--- a/ci/requirements-2.6.txt
+++ b/ci/requirements-2.6.txt
@@ -14,3 +14,4 @@ numexpr=1.4.2
 pymysql=0.6.0
 sqlalchemy=0.7.8
 xlsxwriter=0.4.6
+blosc
diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
index 0d515f300f5a7..5b1991c8810ce 100644
--- a/ci/requirements-2.7.txt
+++ b/ci/requirements-2.7.txt
@@ -23,3 +23,4 @@ beautiful-soup=4.2.1
 httplib2=0.8
 python-gflags=2.0
 google-api-python-client=1.2
+blosc
diff --git a/ci/requirements-2.7_32.txt b/ci/requirements-2.7_32.txt
index 01b305bb6f21a..9e6dec6048d35 100644
--- a/ci/requirements-2.7_32.txt
+++ b/ci/requirements-2.7_32.txt
@@ -9,3 +9,4 @@ matplotlib
 openpyxl
 xlrd
 scipy
+blosc
diff --git a/ci/requirements-2.7_64.txt b/ci/requirements-2.7_64.txt
index 01b305bb6f21a..9e6dec6048d35 100644
--- a/ci/requirements-2.7_64.txt
+++ b/ci/requirements-2.7_64.txt
@@ -9,3 +9,4 @@ matplotlib
 openpyxl
 xlrd
 scipy
+blosc
diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt
index 6c70bfd77ff3f..f5e46f3ddade8 100644
--- a/ci/requirements-2.7_LOCALE.txt
+++ b/ci/requirements-2.7_LOCALE.txt
@@ -16,3 +16,4 @@ scipy=0.11.0
 beautiful-soup=4.2.1
 statsmodels=0.4.3
 bigquery=2.0.17
+blosc
diff --git a/ci/requirements-2.7_SLOW.txt b/ci/requirements-2.7_SLOW.txt
index a1ecbceda40dd..2b84fd7b095b2 100644
--- a/ci/requirements-2.7_SLOW.txt
+++ b/ci/requirements-2.7_SLOW.txt
@@ -23,3 +23,4 @@ beautiful-soup
 httplib2
 python-gflags
 google-api-python-client
+blosc
diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt
index 9ba8fd7ca9393..bd7bd3c7e7219 100644
--- a/ci/requirements-3.2.txt
+++ b/ci/requirements-3.2.txt
@@ -13,3 +13,4 @@ html5lib
 scipy==0.12.0
 beautifulsoup4==4.2.1
 statsmodels==0.5.0
+blosc
diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt
index c9beec81236fb..0a5cba36b79d3 100644
--- a/ci/requirements-3.3.txt
+++ b/ci/requirements-3.3.txt
@@ -15,3 +15,4 @@ lxml=3.2.1
 scipy
 beautiful-soup=4.2.1
 statsmodels
+blosc
diff --git a/ci/requirements-3.4.txt b/ci/requirements-3.4.txt
index 8a55c0458688e..6ba917154e31d 100644
--- a/ci/requirements-3.4.txt
+++ b/ci/requirements-3.4.txt
@@ -17,3 +17,4 @@ sqlalchemy
 bottleneck
 pymysql==0.6.3
 psycopg2
+blosc
diff --git a/ci/requirements-3.4_32.txt b/ci/requirements-3.4_32.txt
index e9dfe9f0ee19e..bac033f7275c8 100644
--- a/ci/requirements-3.4_32.txt
+++ b/ci/requirements-3.4_32.txt
@@ -8,3 +8,4 @@ scipy
 numexpr
 pytables
 matplotlib
+blosc
diff --git a/ci/requirements-3.4_64.txt b/ci/requirements-3.4_64.txt
index e9dfe9f0ee19e..bac033f7275c8 100644
--- a/ci/requirements-3.4_64.txt
+++ b/ci/requirements-3.4_64.txt
@@ -8,3 +8,4 @@ scipy
 numexpr
 pytables
 matplotlib
+blosc
diff --git a/ci/requirements-3.4_SLOW.txt b/ci/requirements-3.4_SLOW.txt
index 930cf126b7da0..c6d904cb4c314 100644
--- a/ci/requirements-3.4_SLOW.txt
+++ b/ci/requirements-3.4_SLOW.txt
@@ -17,3 +17,4 @@ sqlalchemy
 bottleneck
 pymysql
 psycopg2
+blosc

From 9214a00d4ccc7e05dd06250b36b04697b7201cfd Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 08:25:35 -0400
Subject: [PATCH 012/239] Diagnose why blosc is not importing

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 6c4d6897a69de..ca225b2513ab7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -126,6 +126,7 @@ before_install:
   - pwd
   - uname -a
   - python -V
+  - python -c "import blosc; blosc.print_versions()"
   - ci/before_install.sh
   # Xvfb stuff for clipboard functionality; see the travis-ci documentation
   - export DISPLAY=:99.0

From 660a675160d8e6743505c713d9d81b67e4fbcf4d Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 08:40:25 -0400
Subject: [PATCH 013/239] Get travis to cooperate

---
 .travis.yml          | 1 -
 ci/install_pydata.sh | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ca225b2513ab7..6c4d6897a69de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -126,7 +126,6 @@ before_install:
   - pwd
   - uname -a
   - python -V
-  - python -c "import blosc; blosc.print_versions()"
   - ci/before_install.sh
   # Xvfb stuff for clipboard functionality; see the travis-ci documentation
   - export DISPLAY=:99.0
diff --git a/ci/install_pydata.sh b/ci/install_pydata.sh
index 33a6d3854da22..a52446043b3b8 100755
--- a/ci/install_pydata.sh
+++ b/ci/install_pydata.sh
@@ -92,6 +92,8 @@ rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt
 
 time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt
 
+python -c 'import blosc; blosc.print_versions()'
+
 # Need to enable for locale testing. The location of the locale file(s) is
 # distro specific. For example, on Arch Linux all of the locales are in a
 # commented file--/etc/locale.gen--that must be commented in to be used

From f811a1aa7ebc74e97a4c84ffb4257d86b666e030 Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 08:54:13 -0400
Subject: [PATCH 014/239] Make travis listen

---
 ci/install_pydata.sh | 2 --
 ci/script.sh         | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/install_pydata.sh b/ci/install_pydata.sh
index a52446043b3b8..33a6d3854da22 100755
--- a/ci/install_pydata.sh
+++ b/ci/install_pydata.sh
@@ -92,8 +92,6 @@ rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt
 
 time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt
 
-python -c 'import blosc; blosc.print_versions()'
-
 # Need to enable for locale testing. The location of the locale file(s) is
 # distro specific. For example, on Arch Linux all of the locales are in a
 # commented file--/etc/locale.gen--that must be commented in to be used
diff --git a/ci/script.sh b/ci/script.sh
index b1ba7ba79c816..6d747ee8a447b 100755
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -16,6 +16,8 @@ fi
 "$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 > /tmp/doc.log &
 # doc build log will be shown after tests
 
+pip install -U blosc
+python -c 'import blosc; blosc.print_versions()'
 
 echo nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
 nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml

From adc24445c5da1daa6a7791fd75ba9c0fc482887d Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 10:23:17 -0400
Subject: [PATCH 015/239] Respond to comments from jreback

---
 ci/requirements-2.6.txt         |  1 -
 ci/requirements-2.7.txt         |  1 -
 ci/requirements-2.7_32.txt      |  1 -
 ci/requirements-2.7_64.txt      |  1 -
 ci/requirements-2.7_LOCALE.txt  |  1 -
 ci/requirements-2.7_SLOW.txt    |  1 -
 ci/requirements-3.2.txt         |  1 -
 ci/requirements-3.3.txt         |  1 -
 ci/requirements-3.4.txt         |  1 -
 ci/requirements-3.4_32.txt      |  1 -
 ci/requirements-3.4_64.txt      |  1 -
 ci/requirements-3.4_SLOW.txt    |  1 -
 ci/script.sh                    |  2 +-
 pandas/io/tests/test_packers.py | 14 +++++++++-----
 14 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt
index a26f260ab5841..9b338cee26801 100644
--- a/ci/requirements-2.6.txt
+++ b/ci/requirements-2.6.txt
@@ -14,4 +14,3 @@ numexpr=1.4.2
 pymysql=0.6.0
 sqlalchemy=0.7.8
 xlsxwriter=0.4.6
-blosc
diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
index 5b1991c8810ce..0d515f300f5a7 100644
--- a/ci/requirements-2.7.txt
+++ b/ci/requirements-2.7.txt
@@ -23,4 +23,3 @@ beautiful-soup=4.2.1
 httplib2=0.8
 python-gflags=2.0
 google-api-python-client=1.2
-blosc
diff --git a/ci/requirements-2.7_32.txt b/ci/requirements-2.7_32.txt
index 9e6dec6048d35..01b305bb6f21a 100644
--- a/ci/requirements-2.7_32.txt
+++ b/ci/requirements-2.7_32.txt
@@ -9,4 +9,3 @@ matplotlib
 openpyxl
 xlrd
 scipy
-blosc
diff --git a/ci/requirements-2.7_64.txt b/ci/requirements-2.7_64.txt
index 9e6dec6048d35..01b305bb6f21a 100644
--- a/ci/requirements-2.7_64.txt
+++ b/ci/requirements-2.7_64.txt
@@ -9,4 +9,3 @@ matplotlib
 openpyxl
 xlrd
 scipy
-blosc
diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt
index f5e46f3ddade8..6c70bfd77ff3f 100644
--- a/ci/requirements-2.7_LOCALE.txt
+++ b/ci/requirements-2.7_LOCALE.txt
@@ -16,4 +16,3 @@ scipy=0.11.0
 beautiful-soup=4.2.1
 statsmodels=0.4.3
 bigquery=2.0.17
-blosc
diff --git a/ci/requirements-2.7_SLOW.txt b/ci/requirements-2.7_SLOW.txt
index 2b84fd7b095b2..a1ecbceda40dd 100644
--- a/ci/requirements-2.7_SLOW.txt
+++ b/ci/requirements-2.7_SLOW.txt
@@ -23,4 +23,3 @@ beautiful-soup
 httplib2
 python-gflags
 google-api-python-client
-blosc
diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt
index bd7bd3c7e7219..9ba8fd7ca9393 100644
--- a/ci/requirements-3.2.txt
+++ b/ci/requirements-3.2.txt
@@ -13,4 +13,3 @@ html5lib
 scipy==0.12.0
 beautifulsoup4==4.2.1
 statsmodels==0.5.0
-blosc
diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt
index 0a5cba36b79d3..c9beec81236fb 100644
--- a/ci/requirements-3.3.txt
+++ b/ci/requirements-3.3.txt
@@ -15,4 +15,3 @@ lxml=3.2.1
 scipy
 beautiful-soup=4.2.1
 statsmodels
-blosc
diff --git a/ci/requirements-3.4.txt b/ci/requirements-3.4.txt
index 6ba917154e31d..8a55c0458688e 100644
--- a/ci/requirements-3.4.txt
+++ b/ci/requirements-3.4.txt
@@ -17,4 +17,3 @@ sqlalchemy
 bottleneck
 pymysql==0.6.3
 psycopg2
-blosc
diff --git a/ci/requirements-3.4_32.txt b/ci/requirements-3.4_32.txt
index bac033f7275c8..e9dfe9f0ee19e 100644
--- a/ci/requirements-3.4_32.txt
+++ b/ci/requirements-3.4_32.txt
@@ -8,4 +8,3 @@ scipy
 numexpr
 pytables
 matplotlib
-blosc
diff --git a/ci/requirements-3.4_64.txt b/ci/requirements-3.4_64.txt
index bac033f7275c8..e9dfe9f0ee19e 100644
--- a/ci/requirements-3.4_64.txt
+++ b/ci/requirements-3.4_64.txt
@@ -8,4 +8,3 @@ scipy
 numexpr
 pytables
 matplotlib
-blosc
diff --git a/ci/requirements-3.4_SLOW.txt b/ci/requirements-3.4_SLOW.txt
index c6d904cb4c314..930cf126b7da0 100644
--- a/ci/requirements-3.4_SLOW.txt
+++ b/ci/requirements-3.4_SLOW.txt
@@ -17,4 +17,3 @@ sqlalchemy
 bottleneck
 pymysql
 psycopg2
-blosc
diff --git a/ci/script.sh b/ci/script.sh
index 6d747ee8a447b..e1f71e70ded69 100755
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -16,7 +16,7 @@ fi
 "$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 > /tmp/doc.log &
 # doc build log will be shown after tests
 
-pip install -U blosc
+pip install -U blosc  # See https://github.com/pydata/pandas/pull/9783
 python -c 'import blosc; blosc.print_versions()'
 
 echo nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 74ef8bf20f70c..d85e75f5d2818 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -447,18 +447,22 @@ def test_sparse_panel(self):
 
 
 class TestCompression(TestPackers):
+    """See https://github.com/pydata/pandas/pull/9783
+    """
 
     def setUp(self):
         super(TestCompression, self).setUp()
         data = {
-            'A': np.arange(1000, dtype=float),
-            'B': range(1000),
+            'A': np.arange(1000, dtype=np.float64),
+            'B': np.arange(1000, dtype=np.int32),
             'C': list(100 * 'abcdefghij'),
+            'D': date_range(datetime.datetime(2015, 4, 1), periods=1000),
+            'E': [datetime.timedelta(days=x) for x in range(1000)],
         }
         self.frame = {
-            'float': DataFrame(dict([(k, data[k]) for k in ['A', 'A']])),
-            'int': DataFrame(dict([(k, data[k]) for k in ['B', 'B']])),
-            'mixed': DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C']])),
+            'float': DataFrame(dict((k, data[k]) for k in ['A', 'A'])),
+            'int': DataFrame(dict((k, data[k]) for k in ['B', 'B'])),
+            'mixed': DataFrame(data),
         }
 
     def test_plain(self):

From bf64ed9ceaa20b61cc0d741c3f106fd7858bca76 Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <rhh@crosscompute.com>
Date: Thu, 2 Apr 2015 10:35:29 -0400
Subject: [PATCH 016/239] Restore spacing


From b0b1c265a3729339de77b96772c07134dcdce1ef Mon Sep 17 00:00:00 2001
From: dsm054 <dsm054@gmail.com>
Date: Sat, 28 Mar 2015 18:35:51 -0400
Subject: [PATCH 017/239] BUG: DataFrame.equals should not care about block
 order (GH #9330)

---
 doc/source/whatsnew/v0.16.1.txt  |  1 +
 pandas/core/internals.py         | 14 +++++++++++++-
 pandas/io/tests/test_pytables.py | 18 ++++++++++++++++--
 pandas/tests/test_frame.py       | 14 ++++++++++++++
 pandas/tests/test_internals.py   | 25 ++++++++++++++++++++++---
 5 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 160778ed9f8ca..1f4b1284e5509 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -71,6 +71,7 @@ Bug Fixes
 
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
 
+- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
 
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7a16fb2b6b0d7..9b2d366bfb2be 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -3310,8 +3310,20 @@ def equals(self, other):
             return False
         self._consolidate_inplace()
         other._consolidate_inplace()
+        if len(self.blocks) != len(other.blocks):
+            return False
+
+        # canonicalize block order, using a tuple combining the type
+        # name and then mgr_locs because there might be unconsolidated
+        # blocks (say, Categorical) which can only be distinguished by
+        # the iteration order
+        def canonicalize(block):
+            return (block.dtype.name, block.mgr_locs.as_array.tolist())
+
+        self_blocks = sorted(self.blocks, key=canonicalize)
+        other_blocks = sorted(other.blocks, key=canonicalize)
         return all(block.equals(oblock) for block, oblock in
-                   zip(self.blocks, other.blocks))
+                   zip(self_blocks, other_blocks))
 
 
 class SingleBlockManager(BlockManager):
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index acdc991c92efe..03e7a8eae549d 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4584,19 +4584,33 @@ def test_duplicate_column_name(self):
         with ensure_clean_path(self.path) as path:
             self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed')
 
+            df.to_hdf(path, 'df', format='table')
+            other = read_hdf(path, 'df')
+
+            tm.assert_frame_equal(df, other)
+            self.assertTrue(df.equals(other))
+            self.assertTrue(other.equals(df))
+
+    def test_round_trip_equals(self):
+        # GH 9330
+        df = DataFrame({"B": [1,2], "A": ["x","y"]})
+
+        with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', format='table')
             other = read_hdf(path, 'df')
             tm.assert_frame_equal(df, other)
+            self.assertTrue(df.equals(other))
+            self.assertTrue(other.equals(df))
 
     def test_preserve_timedeltaindex_type(self):
-        # GH9635 
+        # GH9635
         # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
         # the type of the index.
         df = DataFrame(np.random.normal(size=(10,5)))
         df.index = timedelta_range(start='0s',periods=10,freq='1s',name='example')
 
         with ensure_clean_store(self.path) as store:
-            
+
             store['df'] = df
             assert_frame_equal(store['df'], df)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index cdda087b27613..3e4c16f63035f 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -5944,6 +5944,20 @@ def test_boolean_comparison(self):
         self.assertRaises(ValueError, lambda : df == (2,2))
         self.assertRaises(ValueError, lambda : df == [2,2])
 
+    def test_equals_different_blocks(self):
+        # GH 9330
+        df0 = pd.DataFrame({"A": ["x","y"], "B": [1,2], 
+                            "C": ["w","z"]})
+        df1 = df0.reset_index()[["A","B","C"]]
+        # this assert verifies that the above operations have 
+        # induced a block rearrangement
+        self.assertTrue(df0._data.blocks[0].dtype != 
+                        df1._data.blocks[0].dtype)
+        # do the real tests
+        self.assert_frame_equal(df0, df1)
+        self.assertTrue(df0.equals(df1))
+        self.assertTrue(df1.equals(df0))
+        
     def test_to_csv_from_csv(self):
 
         pname = '__tmp_to_csv_from_csv__'
diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
index 45f089f5e0a53..36585abd1b98f 100644
--- a/pandas/tests/test_internals.py
+++ b/pandas/tests/test_internals.py
@@ -68,15 +68,15 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
     elif typestr in ('object', 'string', 'O'):
         values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
                             shape)
-    elif typestr in ('bool'):
+    elif typestr in ('b','bool',):
         values = np.ones(shape, dtype=np.bool_)
     elif typestr in ('datetime', 'dt', 'M8[ns]'):
         values = (mat * 1e9).astype('M8[ns]')
     elif typestr in ('timedelta', 'td', 'm8[ns]'):
         values = (mat * 1).astype('m8[ns]')
-    elif typestr in ('category'):
+    elif typestr in ('category',):
         values = Categorical([1,1,2,2,3,3,3,3,4,4])
-    elif typestr in ('category2'):
+    elif typestr in ('category2',):
         values = Categorical(['a','a','a','a','b','b','c','c','c','d'])
     elif typestr in ('sparse', 'sparse_na'):
         # FIXME: doesn't support num_rows != 10
@@ -751,6 +751,25 @@ def test_equals(self):
         bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
         self.assertTrue(bm1.equals(bm2))
 
+    def test_equals_block_order_different_dtypes(self):
+        # GH 9330
+        
+        mgr_strings = [ 
+            "a:i8;b:f8", # basic case
+            "a:i8;b:f8;c:c8;d:b", # many types
+            "a:i8;e:dt;f:td;g:string", # more types
+            "a:i8;b:category;c:category2;d:category2", # categories
+            "c:sparse;d:sparse_na;b:f8", # sparse
+            ]
+        
+        for mgr_string in mgr_strings:
+            bm = create_mgr(mgr_string)
+            block_perms = itertools.permutations(bm.blocks)
+            for bm_perm in block_perms:
+                bm_this = BlockManager(bm_perm, bm.axes)
+                self.assertTrue(bm.equals(bm_this))
+                self.assertTrue(bm_this.equals(bm))
+
     def test_single_mgr_ctor(self):
         mgr = create_single_mgr('f8', num_rows=5)
         self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.])

From 7375ec430af7bc40e237e30428949babdce27690 Mon Sep 17 00:00:00 2001
From: Tomaz Berisa <tomaz.berisa@gmail.com>
Date: Fri, 3 Apr 2015 16:12:44 -0400
Subject: [PATCH 018/239] BUG: Fix for #9764

Values from range [1e-7, 5e-7] (for display.precision=7) not displaying 0 anymore
---
 pandas/core/format.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/format.py b/pandas/core/format.py
index b21ca9050ffd0..7b8a3161b5e05 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -1996,7 +1996,7 @@ def _format_strings(self):
 
             # this is pretty arbitrary for now
             has_large_values = (abs_vals > 1e8).any()
-            has_small_values = ((abs_vals < 10 ** (-self.digits)) &
+            has_small_values = ((abs_vals < 10 ** (-self.digits+1)) &
                                 (abs_vals > 0)).any()
 
             if too_long and has_large_values:

From 2296520f93d84ef1eb13c29532fe331291973998 Mon Sep 17 00:00:00 2001
From: Tomaz Berisa <tomaz.berisa@gmail.com>
Date: Fri, 3 Apr 2015 17:29:56 -0400
Subject: [PATCH 019/239] TST: Test for #9764 fix

---
 pandas/tests/test_format.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index ce32c8af99a73..145fa78b387cd 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -2986,6 +2986,20 @@ def test_format(self):
         self.assertEqual(result[0], " 12")
         self.assertEqual(result[1], "  0")
 
+    def test_output_significant_digits(self):
+        # relevant to issue #9764
+        d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
+        
+        expected_output={
+            (0,6):'           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+            (1,6):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+            (1,8):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
+            (8,16):'            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
+            (9,16):'        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
+        }
+
+        for k, v in expected_output.items():
+            self.assertEqual(d[k[0]:k[1]].__str__(), v)
 
 class TestRepr_timedelta64(tm.TestCase):
 

From 00907a97b7ca03d0e16e09f40327e296de7a1f95 Mon Sep 17 00:00:00 2001
From: Tomaz Berisa <tomaz.berisa@gmail.com>
Date: Fri, 3 Apr 2015 17:48:38 -0400
Subject: [PATCH 020/239] TST: Test saves and restore context (#9764)

---
 pandas/tests/test_format.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index 145fa78b387cd..e3f458690d5f7 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -2987,7 +2987,13 @@ def test_format(self):
         self.assertEqual(result[1], "  0")
 
     def test_output_significant_digits(self):
-        # relevant to issue #9764
+        # Issue #9764
+        
+        # In case default display precision changes:
+        saved_option=pd.get_option('display.precision')
+        pd.set_option('display.precision', 7)
+
+        # DataFrame from issue #9764
         d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
         
         expected_output={
@@ -3001,6 +3007,9 @@ def test_output_significant_digits(self):
         for k, v in expected_output.items():
             self.assertEqual(d[k[0]:k[1]].__str__(), v)
 
+        # Restore precision
+        pd.set_option('display.precision', saved_option)
+
 class TestRepr_timedelta64(tm.TestCase):
 
     def test_none(self):

From 59ea8cc81c663a786bc87b31a5b4c831b7b68f55 Mon Sep 17 00:00:00 2001
From: Tomaz Berisa <tomaz.berisa@gmail.com>
Date: Sat, 4 Apr 2015 17:39:59 -0400
Subject: [PATCH 021/239] CLN: Test code cleanup (#9764)

---
 pandas/tests/test_format.py | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index e3f458690d5f7..1dcdbf12a6b59 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -2990,25 +2990,21 @@ def test_output_significant_digits(self):
         # Issue #9764
         
         # In case default display precision changes:
-        saved_option=pd.get_option('display.precision')
-        pd.set_option('display.precision', 7)
+        with pd.option_context('display.precision', 7):
+            # DataFrame example from issue #9764
+            d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
+            
+            expected_output={
+                (0,6):'           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+                (1,6):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+                (1,8):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
+                (8,16):'            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
+                (9,16):'        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
+            }
+
+            for (start, stop), v in expected_output.items():
+                self.assertEqual(str(d[start:stop]), v)
 
-        # DataFrame from issue #9764
-        d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
-        
-        expected_output={
-            (0,6):'           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
-            (1,6):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
-            (1,8):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
-            (8,16):'            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
-            (9,16):'        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
-        }
-
-        for k, v in expected_output.items():
-            self.assertEqual(d[k[0]:k[1]].__str__(), v)
-
-        # Restore precision
-        pd.set_option('display.precision', saved_option)
 
 class TestRepr_timedelta64(tm.TestCase):
 

From d5be1059fdc3df9438dde35c6fd22528337ade3e Mon Sep 17 00:00:00 2001
From: Tomaz Berisa <tomaz.berisa@gmail.com>
Date: Sat, 4 Apr 2015 17:54:32 -0400
Subject: [PATCH 022/239] DOC: Update whatsnew for 0.16.1 (#9764)

---
 doc/source/whatsnew/v0.16.1.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 1f4b1284e5509..a20e901dbddd1 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -81,5 +81,9 @@ Bug Fixes
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
+<<<<<<< HEAD
 
 - Bug in ``ParserBase.convert_to_nd_arrays`` when called by ``DataFrame.read_fwf`` (:issue:`9266``) 
+=======
+- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)
+>>>>>>> ce988b4... DOC: Update whatsnew for 0.16.1 (#9764)

From 22f62c7941bad30f253628510f2ea69265a52c05 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 4 Apr 2015 13:12:11 -0400
Subject: [PATCH 023/239] DOC: add dev environment creation details to
 contributing.rst

---
 CONTRIBUTING.md             | 79 ++++++++++++++++++++++++++++++--
 ci/requirements_all.txt     | 21 +++++++++
 ci/requirements_dev.txt     |  5 ++
 doc/source/contributing.rst | 91 +++++++++++++++++++++++++++++++++++--
 doc/source/install.rst      | 46 ++-----------------
 5 files changed, 192 insertions(+), 50 deletions(-)
 create mode 100644 ci/requirements_all.txt
 create mode 100644 ci/requirements_dev.txt

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f7041dbabdad5..d3eeb820a12eb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -137,6 +137,69 @@ clear what the branch brings to *pandas*. You can have many
 shiny-new-features and switch in between them using the git checkout
 command.
 
+### Creating a Development Environment
+
+An easy way to create a *pandas* development environment is as follows.
+
+-   Install either Install Anaconda \<install-anaconda\> or
+    Install miniconda \<install-miniconda\>
+-   Make sure that you have
+    cloned the repository \<contributing-forking\>
+-   `cd` to the pandas source directory
+
+Tell `conda` to create a new environment, named `pandas_dev`, or any
+name you would like for this environment by running:
+
+    conda create -n pandas_dev --file ci/requirements_dev.txt
+
+For a python 3 environment
+
+    conda create -n pandas_dev python=3 --file ci/requirements_dev.txt
+
+If you are on `windows`, then you will need to install the compiler
+linkages:
+
+    conda install -n pandas_dev libpython
+
+This will create the new environment, and not touch any of your existing
+environments, nor any existing python installation. It will install all
+of the basic dependencies of *pandas*, as well as the development and
+testing tools. If you would like to install other dependencies, you can
+install them as follows:
+
+    conda install -n pandas_dev -c pandas pytables scipy
+
+To install *all* pandas dependencies you can do the following:
+
+    conda install -n pandas_dev -c pandas --file ci/requirements_all.txt
+
+To work in this environment, `activate` it as follows:
+
+    activate pandas_dev
+
+At which point, the prompt will change to indicate you are in the new
+development environment.
+
+> **note**
+>
+> The above syntax is for `windows` environments. To work on
+> `macosx/linux`, use:
+>
+>     source activate pandas_dev
+
+To view your environments:
+
+    conda info -e
+
+To return to you home root environment:
+
+    deactivate
+
+See the full `conda` docs [here](http://conda.pydata.org/docs).
+
+At this point you can easily do an *in-place* install, as detailed in
+the next section.
+
 ### Making changes
 
 Before making your code changes, it is often necessary to build the code
@@ -231,13 +294,19 @@ docstrings that follow the Numpy Docstring Standard (see above), but you
 don't need to install this because a local copy of `numpydoc` is
 included in the *pandas* source code.
 
+It is easiest to
+create a development environment \<contributing-dev\_env\>, then
+install:
+
+    conda install -n pandas_dev sphinx ipython
+
 Furthermore, it is recommended to have all [optional
 dependencies](http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies)
-installed. This is not needed, but be aware that you will see some error
-messages. Because all the code in the documentation is executed during
-the doc build, the examples using this optional dependencies will
-generate errors. Run `pd.show_versions()` to get an overview of the
-installed version of all dependencies.
+installed. This is not strictly necessary, but be aware that you will
+see some error messages. Because all the code in the documentation is
+executed during the doc build, the examples using this optional
+dependencies will generate errors. Run `pd.show_versions()` to get an
+overview of the installed version of all dependencies.
 
 > **warning**
 >
diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt
new file mode 100644
index 0000000000000..c70efed96a8dd
--- /dev/null
+++ b/ci/requirements_all.txt
@@ -0,0 +1,21 @@
+nose
+sphinx
+ipython
+dateutil
+pytz
+openpyxl
+xlsxwriter
+xlrd
+html5lib
+patsy
+beautiful-soup
+numpy
+cython
+scipy
+numexpr
+pytables
+matplotlib
+lxml
+sqlalchemy
+bottleneck
+pymysql
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
new file mode 100644
index 0000000000000..b273ca043c4a2
--- /dev/null
+++ b/ci/requirements_dev.txt
@@ -0,0 +1,5 @@
+dateutil
+pytz
+numpy
+cython
+nose
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index b3b2d272e66c6..cc4473e8d355a 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -96,6 +96,8 @@ Getting Started with Git
 setting up your SSH key, and configuring git.  All these steps need to be completed before
 working seamlessly with your local repository and GitHub.
 
+.. _contributing.forking:
+
 Forking
 -------
 
@@ -132,6 +134,84 @@ changes in this branch specific to one bug or feature so it is clear
 what the branch brings to *pandas*. You can have many shiny-new-features
 and switch in between them using the git checkout command.
 
+.. _contributing.dev_env:
+
+Creating a Development Environment
+----------------------------------
+
+An easy way to create a *pandas* development environment is as follows.
+
+- Install either :ref:`Install Anaconda <install-anaconda>` or :ref:`Install miniconda <install-miniconda>`
+- Make sure that you have :ref:`cloned the repository <contributing-forking>`
+- ``cd`` to the pandas source directory
+
+Tell ``conda`` to create a new environment, named ``pandas_dev``, or any name you would like for this environment by running:
+
+::
+
+      conda create -n pandas_dev --file ci/requirements_dev.txt
+
+
+For a python 3 environment
+
+::
+
+      conda create -n pandas_dev python=3 --file ci/requirements_dev.txt
+
+
+If you are on ``windows``, then you will need to install the compiler linkages:
+
+::
+
+      conda install -n pandas_dev libpython
+
+This will create the new environment, and not touch any of your existing environments, nor any existing python installation. It will install all of the basic dependencies of *pandas*, as well as the development and testing tools. If you would like to install other dependencies, you can install them as follows:
+
+::
+
+      conda install -n pandas_dev -c pandas pytables scipy
+
+To install *all* pandas dependencies you can do the following:
+
+::
+
+      conda install -n pandas_dev -c pandas --file ci/requirements_all.txt
+
+To work in this environment, ``activate`` it as follows:
+
+::
+
+      activate pandas_dev
+
+At which point, the prompt will change to indicate you are in the new development environment.
+
+.. note::
+
+   The above syntax is for ``windows`` environments. To work on ``macosx/linux``, use:
+
+   ::
+
+       source activate pandas_dev
+
+To view your environments:
+
+::
+
+      conda info -e
+
+To return to you home root environment:
+
+::
+
+      deactivate
+
+See the full ``conda`` docs `here
+<http://conda.pydata.org/docs>`_.
+
+At this point you can easily do an *in-place* install, as detailed in the next section.
+
+.. _contributing.getting_source:
+
 Making changes
 --------------
 
@@ -237,9 +317,15 @@ follow the Numpy Docstring Standard (see above), but you don't need to install
 this because a local copy of ``numpydoc`` is included in the *pandas* source
 code.
 
+It is easiest to :ref:`create a development environment <contributing-dev_env>`, then install:
+
+::
+
+      conda install -n pandas_dev sphinx ipython
+
 Furthermore, it is recommended to have all `optional dependencies
 <http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies>`_
-installed. This is not needed, but be aware that you will see some error
+installed. This is not strictly necessary, but be aware that you will see some error
 messages. Because all the code in the documentation is executed during the doc
 build, the examples using this optional dependencies will generate errors.
 Run ``pd.show_versions()`` to get an overview of the installed version of all
@@ -572,6 +658,3 @@ branch has not actually been merged.
 The branch will still exist on GitHub, so to delete it there do ::
 
     git push origin --delete shiny-new-feature
-
-
-
diff --git a/doc/source/install.rst b/doc/source/install.rst
index dd9021d0439dc..07c88841e5dcb 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -35,6 +35,8 @@ pandas at all.
 Simply create an account, and have access to pandas from within your brower via
 an `IPython Notebook <http://ipython.org/notebook.html>`__ in a few minutes.
 
+.. _install.anaconda
+
 Installing pandas with Anaconda
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -66,6 +68,8 @@ admin rights to install it, it will install in the user's home directory, and
 this also makes it trivial to delete Anaconda at a later date (just delete
 that folder).
 
+.. _install.miniconda
+
 Installing pandas with Miniconda
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -173,47 +177,8 @@ Installing using your Linux distribution's package manager.
 
 Installing from source
 ~~~~~~~~~~~~~~~~~~~~~~
-.. note::
-
-   Installing from the git repository requires a recent installation of `Cython
-   <http://cython.org>`__ as the cythonized C sources are no longer checked
-   into source control. Released source distributions will contain the built C
-   files. I recommend installing the latest Cython via ``easy_install -U
-   Cython``
-
-The source code is hosted at http://github.com/pydata/pandas, it can be checked
-out using git and compiled / installed like so:
-
-::
-
-  git clone git://github.com/pydata/pandas.git
-  cd pandas
-  python setup.py install
-
-Make sure you have Cython installed when installing from the repository,
-rather then a tarball or pypi.
 
-On Windows, I suggest installing the MinGW compiler suite following the
-directions linked to above. Once configured property, run the following on the
-command line:
-
-::
-
-  python setup.py build --compiler=mingw32
-  python setup.py install
-
-Note that you will not be able to import pandas if you open an interpreter in
-the source directory unless you build the C extensions in place:
-
-::
-
-  python setup.py build_ext --inplace
-
-The most recent version of MinGW (any installer dated after 2011-08-03)
-has removed the '-mno-cygwin' option but Distutils has not yet been updated to
-reflect that. Thus, you may run into an error like "unrecognized command line
-option '-mno-cygwin'". Until the bug is fixed in Distutils, you may need to
-install a slightly older version of MinGW (2011-08-02 installer).
+See the :ref:`contributing documentation <contributing>` for complete instructions on building from the git source tree. Further, see :ref:`creating a devevlopment environment <contributing-dev_env>` if you wish to create a *pandas* development environment.
 
 Running the test suite
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -354,4 +319,3 @@ Optional Dependencies
    work. Hence, it is highly recommended that you install these. A packaged
    distribution like `Enthought Canopy
    <http://enthought.com/products/canopy>`__ may be worth considering.
-

From 96adf2c99ebeecc8de65aa5c0b3b0b88c0c8f12f Mon Sep 17 00:00:00 2001
From: David BROCHART <david.brochart@irstea.fr>
Date: Tue, 17 Mar 2015 16:24:21 +0100
Subject: [PATCH 024/239] Fixed bug #9671 where 'DataFrame.plot()' raised an
 error when both 'color' and 'style' keywords were passed and there was no
 color symbol in the style strings (this should be allowed)

---
 doc/source/whatsnew/v0.16.1.txt |  5 +++++
 pandas/tests/test_graphics.py   | 16 ++++++++++++++++
 pandas/tools/plotting.py        | 15 ++++++++++-----
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a20e901dbddd1..e5d2d78ebc2ad 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -86,4 +86,9 @@ Bug Fixes
 - Bug in ``ParserBase.convert_to_nd_arrays`` when called by ``DataFrame.read_fwf`` (:issue:`9266``) 
 =======
 - Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)
+<<<<<<< HEAD
 >>>>>>> ce988b4... DOC: Update whatsnew for 0.16.1 (#9764)
+=======
+
+- Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
+>>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 3ce4e150326a2..36c19cd39f76c 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -1154,6 +1154,22 @@ def test_plot(self):
         self.assertEqual(len(axes), 1)
         self.assertIs(ax.get_axes(), axes[0])
 
+    def test_color_and_style_arguments(self):
+        df = DataFrame({'x': [1, 2], 'y': [3, 4]})
+        # passing both 'color' and 'style' arguments should be allowed
+        # if there is no color symbol in the style strings:
+        ax = df.plot(color = ['red', 'black'], style = ['-', '--'])
+        # check that the linestyles are correctly set:
+        linestyle = [line.get_linestyle() for line in ax.lines]
+        self.assertEqual(linestyle, ['-', '--'])
+        # check that the colors are correctly set:
+        color = [line.get_color() for line in ax.lines]
+        self.assertEqual(color, ['red', 'black'])
+        # passing both 'color' and 'style' arguments should not be allowed
+        # if there is a color symbol in the style strings:
+        with tm.assertRaises(ValueError):
+            df.plot(color = ['red', 'black'], style = ['k-', 'r--'])
+
     def test_nonnumeric_exclude(self):
         df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]})
         ax = df.plot()
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 0be030d7c2c8e..358c5b0dd5940 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -867,12 +867,17 @@ def _validate_color_args(self):
                           "simultaneously. Using 'color'")
 
         if 'color' in self.kwds and self.style is not None:
+            if com.is_list_like(self.style):
+                styles = self.style
+            else:
+                styles = [self.style]
             # need only a single match
-            if re.match('^[a-z]+?', self.style) is not None:
-                raise ValueError("Cannot pass 'style' string with a color "
-                                 "symbol and 'color' keyword argument. Please"
-                                 " use one or the other or pass 'style' "
-                                 "without a color symbol")
+            for s in styles:
+                if re.match('^[a-z]+?', s) is not None:
+                    raise ValueError("Cannot pass 'style' string with a color "
+                                     "symbol and 'color' keyword argument. Please"
+                                     " use one or the other or pass 'style' "
+                                     "without a color symbol")
 
     def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None:

From 307fe736d58d5aae0e622738660868ca2df3e3da Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 6 Apr 2015 08:24:13 -0400
Subject: [PATCH 025/239] DOC: correction to contributing.rst

---
 CONTRIBUTING.md             | 4 ++--
 doc/source/contributing.rst | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d3eeb820a12eb..284ac2fc5b169 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,8 +12,8 @@ navigate to the [GitHub "issues"
 tab](https://github.com/pydata/pandas/issues) and start looking through
 interesting issues. There are a number of issues listed under
 [Docs](https://github.com/pydata/pandas/issues?labels=Docs&sort=updated&state=open)
-and [Good as first
-PR](https://github.com/pydata/pandas/issues?labels=Good+as+first+PR&sort=updated&state=open)
+and [Difficulty
+Novice](https://github.com/pydata/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22)
 where you could start out.
 
 Or maybe through using *pandas* you have an idea of you own or are
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index cc4473e8d355a..2112d5b127e64 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -17,8 +17,8 @@ If you are simply looking to start working with the *pandas* codebase, navigate
 `GitHub "issues" tab <https://github.com/pydata/pandas/issues>`_ and start looking through
 interesting issues.  There are a number of issues listed under `Docs
 <https://github.com/pydata/pandas/issues?labels=Docs&sort=updated&state=open>`_
-and `Good as first PR
-<https://github.com/pydata/pandas/issues?labels=Good+as+first+PR&sort=updated&state=open>`_
+and `Difficulty Novice
+<https://github.com/pydata/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22>`_
 where you could start out.
 
 Or maybe through using *pandas* you have an idea of you own or are looking for something

From 34337582c7bddd05e19e980a05aab297c7c171a0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 1 Apr 2015 13:38:29 +0200
Subject: [PATCH 026/239] DOC: fix some various doc warnings

---
 doc/source/r_interface.rst |  1 +
 pandas/core/strings.py     | 18 ++++++++++--------
 pandas/tseries/tools.py    |  5 ++++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst
index 826d9e980538e..2207c823f43b1 100644
--- a/doc/source/r_interface.rst
+++ b/doc/source/r_interface.rst
@@ -56,6 +56,7 @@ appropriate pandas object (most likely a DataFrame):
 
 
 .. ipython:: python
+   :okwarning:
 
    import pandas.rpy.common as com
    infert = com.load_data('infert')
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 97f6752fb5851..4ef341c481a60 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -769,12 +769,14 @@ def str_rstrip(arr, to_strip=None):
 
 
 def str_wrap(arr, width, **kwargs):
-    """
-    Wrap long strings to be formatted in paragraphs
+    r"""
+    Wrap long strings to be formatted in paragraphs.
+
+    This method has the same keyword parameters and defaults as
+    :class:`textwrap.TextWrapper`.
 
     Parameters
     ----------
-    Same keyword parameters and defaults as :class:`textwrap.TextWrapper`
     width : int
         Maximum line-width
     expand_tabs : bool, optional
@@ -806,11 +808,11 @@ def str_wrap(arr, width, **kwargs):
     settings. To achieve behavior matching R's stringr library str_wrap function, use
     the arguments:
 
-        expand_tabs = False
-        replace_whitespace = True
-        drop_whitespace = True
-        break_long_words = False
-        break_on_hyphens = False
+    - expand_tabs = False
+    - replace_whitespace = True
+    - drop_whitespace = True
+    - break_long_words = False
+    - break_on_hyphens = False
 
     Examples
     --------
diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py
index 8430e0209fd78..ef37e003ab67f 100644
--- a/pandas/tseries/tools.py
+++ b/pandas/tseries/tools.py
@@ -210,10 +210,13 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
 
     Returns
     -------
-    ret : datetime if parsing succeeded. Return type depends on input:
+    ret : datetime if parsing succeeded.
+        Return type depends on input:
+
         - list-like: DatetimeIndex
         - Series: Series of datetime64 dtype
         - scalar: Timestamp
+
         In case when it is not possible to return designated types (e.g. when
         any element of input is before Timestamp.min or after Timestamp.max)
         return will have datetime.datetime type (or correspoding array/Series).

From 1678e64f73b4abdfb71601eca0ad30676fcc8680 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 3 Apr 2015 08:43:03 -0400
Subject: [PATCH 027/239] Closes #9795 (Stata writer changes input frame)

Add note to release notes
---
 doc/source/whatsnew/v0.16.1.txt | 2 +-
 pandas/io/stata.py              | 4 +++-
 pandas/io/tests/test_stata.py   | 9 +++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index e5d2d78ebc2ad..de87b42dc5441 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -66,7 +66,7 @@ Bug Fixes
 
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
 
-
+- Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).
 
 
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 7dd32fd00a4d2..3972bad7b2d83 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1885,6 +1885,8 @@ def _prepare_pandas(self, data):
         #NOTE: we might need a different API / class for pandas objects so
         # we can set different semantics - handle this with a PR to pandas.io
 
+        data = data.copy()
+
         if self._write_index:
             data = data.reset_index()
 
@@ -2013,7 +2015,7 @@ def _write_variable_labels(self, labels=None):
                 self._write(_pad_bytes("", 81))
 
     def _prepare_data(self):
-        data = self.data.copy()
+        data = self.data
         typlist = self.typlist
         convert_dates = self._convert_dates
         # 1. Convert dates
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
index 8b44be61d5f66..0aaf018b21584 100644
--- a/pandas/io/tests/test_stata.py
+++ b/pandas/io/tests/test_stata.py
@@ -290,6 +290,15 @@ def test_stata_doc_examples(self):
             df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
             df.to_stata(path)
 
+    def test_write_preserves_original(self):
+        # 9795
+        np.random.seed(423)
+        df = pd.DataFrame(np.random.randn(5,4), columns=list('abcd'))
+        df.ix[2, 'a':'c'] = np.nan
+        df_copy = df.copy()
+        df.to_stata('test.dta', write_index=False)
+        tm.assert_frame_equal(df, df_copy)
+
     def test_encoding(self):
 
         # GH 4626, proper encoding handling

From 954a68ef3e3f474718d5494f4bc0c0cf94a5dd85 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Tue, 7 Apr 2015 08:33:23 -0400
Subject: [PATCH 028/239] DOC: Clean up documentation for convert_objects

---
 pandas/core/generic.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 012a73fac1ef4..b695ef6550f42 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2261,19 +2261,23 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
 
         Parameters
         ----------
-        convert_dates : if True, attempt to soft convert dates, if 'coerce',
-            force conversion (and non-convertibles get NaT)
-        convert_numeric : if True attempt to coerce to numbers (including
-            strings), non-convertibles get NaN
-        convert_timedeltas : if True, attempt to soft convert timedeltas, if 'coerce',
-            force conversion (and non-convertibles get NaT)
-        copy : Boolean, if True, return copy even if no copy is necessary
-            (e.g. no conversion was done), default is True.
-            It is meant for internal use, not to be confused with `inplace` kw.
+        convert_dates : boolean, default True
+            If True, convert to date where possible. If 'coerce', force
+            conversion, with unconvertible values becoming NaT.
+        convert_numeric : boolean, default False
+            If True, attempt to coerce to numbers (including strings), with
+            unconvertible values becoming NaN.
+        convert_timedeltas : boolean, default True
+            If True, convert to timedelta where possible. If 'coerce', force
+            conversion, with unconvertible values becoming NaT.
+        copy : boolean, default True
+            If True, return a copy even if no copy is necessary (e.g. no
+            conversion was done). Note: This is meant for internal use, and
+            should not be confused with inplace.
 
         Returns
         -------
-        converted : asm as input object
+        converted : same as input object
         """
         return self._constructor(
             self._data.convert(convert_dates=convert_dates,

From d4880936e9bc6f4d075d0a72d4489fc400d3a2d8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 5 Apr 2015 19:17:04 -0500
Subject: [PATCH 029/239] API: Sort keys for DataFrame.assign

Previously the order was arbitrary. For predicitability,
we'll sort before inserting.
---
 doc/source/dsintro.rst          |  6 ++++--
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/core/frame.py            | 11 ++++++-----
 pandas/tests/test_frame.py      | 19 ++++++++++++++-----
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index e1c14029f1cf9..adcf2fca9b4c5 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -461,7 +461,7 @@ Inspired by `dplyr's
 <http://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html#mutate>`__
 ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign`
 method that allows you to easily create new columns that are potentially
-derived from existing columns. 
+derived from existing columns.
 
 .. ipython:: python
 
@@ -511,7 +511,9 @@ DataFrame is returned, with the new values inserted.
 .. warning::
 
   Since the function signature of ``assign`` is ``**kwargs``, a dictionary,
-  the order of the new columns in the resulting DataFrame cannot be guaranteed.
+  the order of the new columns in the resulting DataFrame cannot be guaranteed
+  to match the order you pass in. To make things predictable, items are inserted
+  alphabetically (by key) at the end of the DataFrame.
 
   All expressions are computed first, and then assigned. So you can't refer
   to another column being assigned in the same call to ``assign``. For example:
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index de87b42dc5441..38f7cea01066d 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -45,6 +45,10 @@ API changes
 - Add support for separating years and quarters using dashes, for
   example 2014-Q1.  (:issue:`9688`)
 
+- :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously
+  the order was arbitrary. (:issue:`9777`)
+
+
 .. _whatsnew_0161.performance:
 
 Performance Improvements
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f700d4316842c..8b683ad89558a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2244,10 +2244,11 @@ def assign(self, **kwargs):
         Notes
         -----
         Since ``kwargs`` is a dictionary, the order of your
-        arguments may not be preserved, and so the order of the
-        new columns is not well defined. Assigning multiple
-        columns within the same ``assign`` is possible, but you cannot
-        reference other columns created within the same ``assign`` call.
+        arguments may not be preserved. The make things predicatable,
+        the columns are inserted in alphabetical order, at the end of
+        your DataFrame. Assigning multiple columns within the same
+        ``assign`` is possible, but you cannot reference other columns
+        created within the same ``assign`` call.
 
         Examples
         --------
@@ -2296,7 +2297,7 @@ def assign(self, **kwargs):
                 results[k] = v
 
         # ... and then assign
-        for k, v in results.items():
+        for k, v in sorted(results.items()):
             data[k] = v
 
         return data
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 3e4c16f63035f..e4abe15dee493 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -14073,12 +14073,21 @@ def test_assign(self):
         assert_frame_equal(result, expected)
 
     def test_assign_multiple(self):
-        df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+        df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=['A', 'B'])
         result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)
-        expected = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9],
-                              'D': [1, 2, 3], 'E': [4, 5, 6]})
-        # column order isn't preserved
-        assert_frame_equal(result.reindex_like(expected), expected)
+        expected = DataFrame([[1, 4, 7, 1, 4], [2, 5, 8, 2, 5],
+                              [3, 6, 9, 3, 6]], columns=list('ABCDE'))
+        assert_frame_equal(result, expected)
+
+    def test_assign_alphabetical(self):
+        # GH 9818
+        df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        result = df.assign(D=df.A + df.B, C=df.A - df.B)
+        expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
+                             columns=list('ABCD'))
+        assert_frame_equal(result, expected)
+        result = df.assign(C=df.A - df.B, D=df.A + df.B)
+        assert_frame_equal(result, expected)
 
     def test_assign_bad(self):
         df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})

From 604410b1f60c4c53b7d73a93d89c33b84c5f582c Mon Sep 17 00:00:00 2001
From: Scott Sanderson <scoutoss@gmail.com>
Date: Mon, 6 Apr 2015 20:32:41 -0400
Subject: [PATCH 030/239] BUG: Preserve tz of start_date in Holiday.dates

Previously, the timezone of only the start_date was lost, causing the
subsequent call to `DatetimeIndex` to fail if a both start_date and
end_date were tz-aware.
---
 pandas/tseries/holiday.py            |   5 +-
 pandas/tseries/tests/test_holiday.py | 181 ++++++++++++++++-----------
 2 files changed, 111 insertions(+), 75 deletions(-)

diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index 3b3542b760d6f..c31e25115c6a4 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -203,7 +203,10 @@ def dates(self, start_date, end_date, return_name=False):
         end_date = Timestamp(end_date)
 
         year_offset = DateOffset(years=1)
-        base_date = Timestamp(datetime(start_date.year, self.month, self.day))
+        base_date = Timestamp(
+            datetime(start_date.year, self.month, self.day),
+            tz=start_date.tz,
+        )
         dates = DatetimeIndex(start=base_date, end=end_date, freq=year_offset)
         holiday_dates = self._apply_rule(dates)
         if self.days_of_week is not None:
diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py
index c2300481eca43..0880e84f1fcde 100644
--- a/pandas/tseries/tests/test_holiday.py
+++ b/pandas/tseries/tests/test_holiday.py
@@ -9,6 +9,7 @@
     HolidayCalendarFactory, next_workday, previous_workday,
     before_nearest_workday, EasterMonday, GoodFriday,
     after_nearest_workday, weekend_to_monday)
+from pytz import utc
 import nose
 
 class TestCalendar(tm.TestCase):
@@ -55,87 +56,119 @@ def setUp(self):
         self.start_date = datetime(2011, 1, 1)
         self.end_date   = datetime(2020, 12, 31)
 
+    def check_results(self, holiday, start, end, expected):
+        self.assertEqual(list(holiday.dates(start, end)), expected)
+        # Verify that timezone info is preserved.
+        self.assertEqual(
+            list(
+                holiday.dates(
+                    utc.localize(Timestamp(start)),
+                    utc.localize(Timestamp(end)),
+                )
+            ),
+            [utc.localize(dt) for dt in expected],
+        )
+
     def test_usmemorialday(self):
-        holidays = USMemorialDay.dates(self.start_date,
-                                       self.end_date)
-        holidayList = [
-                       datetime(2011, 5, 30),
-                       datetime(2012, 5, 28),
-                       datetime(2013, 5, 27),
-                       datetime(2014, 5, 26),
-                       datetime(2015, 5, 25),
-                       datetime(2016, 5, 30),
-                       datetime(2017, 5, 29),
-                       datetime(2018, 5, 28),
-                       datetime(2019, 5, 27),
-                       datetime(2020, 5, 25),
-                       ]
-        self.assertEqual(list(holidays), holidayList)
+        self.check_results(
+            holiday=USMemorialDay,
+            start=self.start_date,
+            end=self.end_date,
+            expected=[
+                datetime(2011, 5, 30),
+                datetime(2012, 5, 28),
+                datetime(2013, 5, 27),
+                datetime(2014, 5, 26),
+                datetime(2015, 5, 25),
+                datetime(2016, 5, 30),
+                datetime(2017, 5, 29),
+                datetime(2018, 5, 28),
+                datetime(2019, 5, 27),
+                datetime(2020, 5, 25),
+            ],
+        )
 
     def test_non_observed_holiday(self):
-        july_3rd = Holiday('July 4th Eve', month=7,  day=3)
-        result = july_3rd.dates("2001-01-01", "2003-03-03")
-        expected = [Timestamp('2001-07-03 00:00:00'),
-                    Timestamp('2002-07-03 00:00:00')]
-        self.assertEqual(list(result), expected)
-        july_3rd = Holiday('July 4th Eve', month=7,  day=3, 
-                           days_of_week=(0, 1, 2, 3))
-        result = july_3rd.dates("2001-01-01", "2008-03-03")
-        expected = [Timestamp('2001-07-03 00:00:00'),
-                    Timestamp('2002-07-03 00:00:00'),
-                    Timestamp('2003-07-03 00:00:00'),
-                    Timestamp('2006-07-03 00:00:00'),
-                    Timestamp('2007-07-03 00:00:00')]
-        self.assertEqual(list(result), expected)
+
+        self.check_results(
+            Holiday('July 4th Eve', month=7, day=3),
+            start="2001-01-01",
+            end="2003-03-03",
+            expected=[
+                Timestamp('2001-07-03 00:00:00'),
+                Timestamp('2002-07-03 00:00:00')
+            ]
+        )
+
+        self.check_results(
+            Holiday('July 4th Eve', month=7, day=3, days_of_week=(0, 1, 2, 3)),
+            start="2001-01-01",
+            end="2008-03-03",
+            expected=[
+                Timestamp('2001-07-03 00:00:00'),
+                Timestamp('2002-07-03 00:00:00'),
+                Timestamp('2003-07-03 00:00:00'),
+                Timestamp('2006-07-03 00:00:00'),
+                Timestamp('2007-07-03 00:00:00'),
+            ]
+        )
 
     def test_easter(self):
-        holidays = EasterMonday.dates(self.start_date,
-                                      self.end_date)
-        holidayList = [Timestamp('2011-04-25 00:00:00'),
-                       Timestamp('2012-04-09 00:00:00'),
-                       Timestamp('2013-04-01 00:00:00'),
-                       Timestamp('2014-04-21 00:00:00'),
-                       Timestamp('2015-04-06 00:00:00'),
-                       Timestamp('2016-03-28 00:00:00'),
-                       Timestamp('2017-04-17 00:00:00'),
-                       Timestamp('2018-04-02 00:00:00'),
-                       Timestamp('2019-04-22 00:00:00'),
-                       Timestamp('2020-04-13 00:00:00')]
-
-
-        self.assertEqual(list(holidays), holidayList)
-        holidays = GoodFriday.dates(self.start_date,
-                                    self.end_date)
-        holidayList = [Timestamp('2011-04-22 00:00:00'),
-                       Timestamp('2012-04-06 00:00:00'),
-                       Timestamp('2013-03-29 00:00:00'),
-                       Timestamp('2014-04-18 00:00:00'),
-                       Timestamp('2015-04-03 00:00:00'),
-                       Timestamp('2016-03-25 00:00:00'),
-                       Timestamp('2017-04-14 00:00:00'),
-                       Timestamp('2018-03-30 00:00:00'),
-                       Timestamp('2019-04-19 00:00:00'),
-                       Timestamp('2020-04-10 00:00:00')]
-        self.assertEqual(list(holidays), holidayList)
-        
+
+        self.check_results(
+            EasterMonday,
+            start=self.start_date,
+            end=self.end_date,
+            expected=[
+                Timestamp('2011-04-25 00:00:00'),
+                Timestamp('2012-04-09 00:00:00'),
+                Timestamp('2013-04-01 00:00:00'),
+                Timestamp('2014-04-21 00:00:00'),
+                Timestamp('2015-04-06 00:00:00'),
+                Timestamp('2016-03-28 00:00:00'),
+                Timestamp('2017-04-17 00:00:00'),
+                Timestamp('2018-04-02 00:00:00'),
+                Timestamp('2019-04-22 00:00:00'),
+                Timestamp('2020-04-13 00:00:00'),
+            ],
+        )
+        self.check_results(
+            GoodFriday,
+            start=self.start_date,
+            end=self.end_date,
+            expected=[
+                Timestamp('2011-04-22 00:00:00'),
+                Timestamp('2012-04-06 00:00:00'),
+                Timestamp('2013-03-29 00:00:00'),
+                Timestamp('2014-04-18 00:00:00'),
+                Timestamp('2015-04-03 00:00:00'),
+                Timestamp('2016-03-25 00:00:00'),
+                Timestamp('2017-04-14 00:00:00'),
+                Timestamp('2018-03-30 00:00:00'),
+                Timestamp('2019-04-19 00:00:00'),
+                Timestamp('2020-04-10 00:00:00'),
+            ],
+        )
 
     def test_usthanksgivingday(self):
-        holidays = USThanksgivingDay.dates(self.start_date,
-                                           self.end_date)
-        holidayList = [
-                       datetime(2011, 11, 24),
-                       datetime(2012, 11, 22),
-                       datetime(2013, 11, 28),
-                       datetime(2014, 11, 27),
-                       datetime(2015, 11, 26),
-                       datetime(2016, 11, 24),
-                       datetime(2017, 11, 23),
-                       datetime(2018, 11, 22),
-                       datetime(2019, 11, 28),
-                       datetime(2020, 11, 26),
-                       ]
-
-        self.assertEqual(list(holidays), holidayList)
+
+        self.check_results(
+            USThanksgivingDay,
+            start=self.start_date,
+            end=self.end_date,
+            expected=[
+                datetime(2011, 11, 24),
+                datetime(2012, 11, 22),
+                datetime(2013, 11, 28),
+                datetime(2014, 11, 27),
+                datetime(2015, 11, 26),
+                datetime(2016, 11, 24),
+                datetime(2017, 11, 23),
+                datetime(2018, 11, 22),
+                datetime(2019, 11, 28),
+                datetime(2020, 11, 26),
+            ],
+        )
 
     def test_argument_types(self):
         holidays = USThanksgivingDay.dates(self.start_date,

From 8014c246ecfe8dae0592e9c295e4c735fceb5372 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 16 Mar 2014 10:29:39 +0900
Subject: [PATCH 031/239] ENH: drop function now has errors keyword for
 non-existing column handling

---
 doc/source/whatsnew/v0.15.2.txt |  2 +-
 doc/source/whatsnew/v0.16.1.txt |  7 ++++
 pandas/core/generic.py          |  8 ++--
 pandas/core/index.py            | 31 ++++++++++------
 pandas/tests/test_frame.py      | 33 +++++++++++++++++
 pandas/tests/test_index.py      | 66 +++++++++++++++++++++++++++++----
 pandas/tests/test_panel.py      |  9 +++++
 pandas/tests/test_series.py     |  8 ++++
 8 files changed, 142 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt
index 02de919e3f83e..6a14a4024ba5a 100644
--- a/doc/source/whatsnew/v0.15.2.txt
+++ b/doc/source/whatsnew/v0.15.2.txt
@@ -49,7 +49,7 @@ API changes
     In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
 
     In [4]: cat
-    Out[4]: 
+    Out[4]:
     [a, b, a]
     Categories (3, object): [a < b < c]
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 38f7cea01066d..53ca4c0c306e1 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -23,6 +23,13 @@ Enhancements
 
 
+- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
+
+  .. ipython:: python
+
+    df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
+    df.drop(['A', 'X'], axis=1, errors='ignore')
+
 
 .. _whatsnew_0161.api:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b695ef6550f42..bc65f1f62fa1a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1557,7 +1557,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
 
         return self.reindex(**d)
 
-    def drop(self, labels, axis=0, level=None, inplace=False):
+    def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
         """
         Return new object with labels in requested axis removed
 
@@ -1569,6 +1569,8 @@ def drop(self, labels, axis=0, level=None, inplace=False):
             For MultiIndex
         inplace : bool, default False
             If True, do operation inplace and return None.
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and existing labels are dropped.
 
         Returns
         -------
@@ -1582,9 +1584,9 @@ def drop(self, labels, axis=0, level=None, inplace=False):
             if level is not None:
                 if not isinstance(axis, MultiIndex):
                     raise AssertionError('axis must be a MultiIndex')
-                new_axis = axis.drop(labels, level=level)
+                new_axis = axis.drop(labels, level=level, errors=errors)
             else:
-                new_axis = axis.drop(labels)
+                new_axis = axis.drop(labels, errors=errors)
             dropped = self.reindex(**{axis_name: new_axis})
             try:
                 dropped.axes[axis_].set_names(axis.names, inplace=True)
diff --git a/pandas/core/index.py b/pandas/core/index.py
index e335d00551bab..fd11cd7f598c3 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -2325,13 +2325,15 @@ def insert(self, loc, item):
             (_self[:loc], item_idx, _self[loc:]))
         return Index(idx, name=self.name)
 
-    def drop(self, labels):
+    def drop(self, labels, errors='raise'):
         """
         Make new Index with passed list of labels deleted
 
         Parameters
         ----------
         labels : array-like
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and existing labels are dropped.
 
         Returns
         -------
@@ -2341,7 +2343,9 @@ def drop(self, labels):
         indexer = self.get_indexer(labels)
         mask = indexer == -1
         if mask.any():
-            raise ValueError('labels %s not contained in axis' % labels[mask])
+            if errors != 'ignore':
+                raise ValueError('labels %s not contained in axis' % labels[mask])
+            indexer = indexer[~mask]
         return self.delete(indexer)
 
     @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
@@ -3847,7 +3851,7 @@ def repeat(self, n):
                           sortorder=self.sortorder,
                           verify_integrity=False)
 
-    def drop(self, labels, level=None):
+    def drop(self, labels, level=None, errors='raise'):
         """
         Make new MultiIndex with passed list of labels deleted
 
@@ -3870,19 +3874,24 @@ def drop(self, labels, level=None):
             indexer = self.get_indexer(labels)
             mask = indexer == -1
             if mask.any():
-                raise ValueError('labels %s not contained in axis'
-                                 % labels[mask])
-            return self.delete(indexer)
+                if errors != 'ignore':
+                    raise ValueError('labels %s not contained in axis'
+                                     % labels[mask])
+                indexer = indexer[~mask]
         except Exception:
             pass
 
         inds = []
         for label in labels:
-            loc = self.get_loc(label)
-            if isinstance(loc, int):
-                inds.append(loc)
-            else:
-                inds.extend(lrange(loc.start, loc.stop))
+            try:
+                loc = self.get_loc(label)
+                if isinstance(loc, int):
+                    inds.append(loc)
+                else:
+                    inds.extend(lrange(loc.start, loc.stop))
+            except KeyError:
+                if errors != 'ignore':
+                    raise
 
         return self.delete(inds)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index e4abe15dee493..b8bdd2d4e3b40 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -7423,6 +7423,26 @@ def test_drop_names(self):
             self.assertEqual(obj.columns.name, 'second')
         self.assertEqual(list(df.columns), ['d', 'e', 'f'])
 
+        self.assertRaises(ValueError, df.drop, ['g'])
+        self.assertRaises(ValueError, df.drop, ['g'], 1)
+
+        # errors = 'ignore'
+        dropped = df.drop(['g'], errors='ignore')
+        expected = Index(['a', 'b', 'c'])
+        self.assert_index_equal(dropped.index, expected)
+
+        dropped = df.drop(['b', 'g'], errors='ignore')
+        expected = Index(['a', 'c'])
+        self.assert_index_equal(dropped.index, expected)
+
+        dropped = df.drop(['g'], axis=1, errors='ignore')
+        expected = Index(['d', 'e', 'f'])
+        self.assert_index_equal(dropped.columns, expected)
+
+        dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
+        expected = Index(['e', 'f'])
+        self.assert_index_equal(dropped.columns, expected)
+
     def test_dropEmptyRows(self):
         N = len(self.frame.index)
         mat = randn(N)
@@ -7801,6 +7821,19 @@ def test_drop(self):
         assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.ix[[2], :])
         assert_frame_equal(simple.drop([0, 3], axis='index'), simple.ix[[1, 2], :])
 
+        self.assertRaises(ValueError, simple.drop, 5)
+        self.assertRaises(ValueError, simple.drop, 'C', 1)
+        self.assertRaises(ValueError, simple.drop, [1, 5])
+        self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1)
+
+        # errors = 'ignore'
+        assert_frame_equal(simple.drop(5, errors='ignore'), simple)
+        assert_frame_equal(simple.drop([0, 5], errors='ignore'),
+                           simple.ix[[1, 2, 3], :])
+        assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple)
+        assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'),
+                           simple[['B']])
+
         #non-unique - wheee!
         nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')),
                           columns=['a', 'a', 'b'])
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 39db387045f12..61cb337880c00 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1036,20 +1036,43 @@ def check_slice(in_slice, expected):
     def test_drop(self):
         n = len(self.strIndex)
 
-        dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)])
+        drop = self.strIndex[lrange(5, 10)]
+        dropped = self.strIndex.drop(drop)
         expected = self.strIndex[lrange(5) + lrange(10, n)]
         self.assertTrue(dropped.equals(expected))
 
         self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar'])
+        self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar'])
+
+        # errors='ignore'
+        mixed = drop.tolist() + ['foo']
+        dropped = self.strIndex.drop(mixed, errors='ignore')
+        expected = self.strIndex[lrange(5) + lrange(10, n)]
+        self.assert_index_equal(dropped, expected)
+
+        dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore')
+        expected = self.strIndex[lrange(n)]
+        self.assert_index_equal(dropped, expected)
 
         dropped = self.strIndex.drop(self.strIndex[0])
         expected = self.strIndex[1:]
-        self.assertTrue(dropped.equals(expected))
+        self.assert_index_equal(dropped, expected)
 
         ser = Index([1, 2, 3])
         dropped = ser.drop(1)
         expected = Index([2, 3])
-        self.assertTrue(dropped.equals(expected))
+        self.assert_index_equal(dropped, expected)
+
+        # errors='ignore'
+        self.assertRaises(ValueError, ser.drop, [3, 4])
+
+        dropped = ser.drop(4, errors='ignore')
+        expected = Index([1, 2, 3])
+        self.assert_index_equal(dropped, expected)
+
+        dropped = ser.drop([3, 4, 5], errors='ignore')
+        expected = Index([1, 2])
+        self.assert_index_equal(dropped, expected)
 
     def test_tuple_union_bug(self):
         import pandas
@@ -3529,21 +3552,50 @@ def test_drop(self):
         dropped2 = self.index.drop(index)
 
         expected = self.index[[0, 2, 3, 5]]
-        self.assertTrue(dropped.equals(expected))
-        self.assertTrue(dropped2.equals(expected))
+        self.assert_index_equal(dropped, expected)
+        self.assert_index_equal(dropped2, expected)
 
         dropped = self.index.drop(['bar'])
         expected = self.index[[0, 1, 3, 4, 5]]
-        self.assertTrue(dropped.equals(expected))
+        self.assert_index_equal(dropped, expected)
+
+        dropped = self.index.drop('foo')
+        expected = self.index[[2, 3, 4, 5]]
+        self.assert_index_equal(dropped, expected)
 
         index = MultiIndex.from_tuples([('bar', 'two')])
         self.assertRaises(KeyError, self.index.drop, [('bar', 'two')])
         self.assertRaises(KeyError, self.index.drop, index)
+        self.assertRaises(KeyError, self.index.drop, ['foo', 'two'])
+
+        # partially correct argument
+        mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
+        self.assertRaises(KeyError, self.index.drop, mixed_index)
+
+        # error='ignore'
+        dropped = self.index.drop(index, errors='ignore')
+        expected = self.index[[0, 1, 2, 3, 4, 5]]
+        self.assert_index_equal(dropped, expected)
+
+        dropped = self.index.drop(mixed_index, errors='ignore')
+        expected = self.index[[0, 1, 2, 3, 5]]
+        self.assert_index_equal(dropped, expected)
+
+        dropped = self.index.drop(['foo', 'two'], errors='ignore')
+        expected = self.index[[2, 3, 4, 5]]
+        self.assert_index_equal(dropped, expected)
 
         # mixed partial / full drop
         dropped = self.index.drop(['foo', ('qux', 'one')])
         expected = self.index[[2, 3, 5]]
-        self.assertTrue(dropped.equals(expected))
+        self.assert_index_equal(dropped, expected)
+
+        # mixed partial / full drop / error='ignore'
+        mixed_index = ['foo', ('qux', 'one'), 'two']
+        self.assertRaises(KeyError, self.index.drop, mixed_index)
+        dropped = self.index.drop(mixed_index, errors='ignore')
+        expected = self.index[[2, 3, 5]]
+        self.assert_index_equal(dropped, expected)
 
     def test_droplevel_with_names(self):
         index = self.index[self.index.get_loc('foo')]
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index cab668b3118fd..0fd03cb5804a8 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1984,6 +1984,15 @@ def check_drop(drop_val, axis_number, aliases, expected):
         expected = Panel({"One": df})
         check_drop('Two', 0, ['items'], expected)
 
+        self.assertRaises(ValueError, panel.drop, 'Three')
+
+        # errors = 'ignore'
+        dropped = panel.drop('Three', errors='ignore')
+        assert_panel_equal(dropped, panel)
+        dropped = panel.drop(['Two', 'Three'], errors='ignore')
+        expected = Panel({"One": df})
+        assert_panel_equal(dropped, expected)
+
         # Major
         exp_df = DataFrame({"A": [2], "B": [4]}, index=[1])
         expected = Panel({"One": exp_df, "Two": exp_df})
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index c021bb1bf2fd6..f044fe540ea24 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1954,6 +1954,14 @@ def test_drop(self):
         self.assertRaises(ValueError, s.drop, 'bc')
         self.assertRaises(ValueError, s.drop, ('a',))
 
+        # errors='ignore'
+        s = Series(range(3),index=list('abc'))
+        result = s.drop('bc', errors='ignore')
+        assert_series_equal(result, s)
+        result = s.drop(['a', 'd'], errors='ignore')
+        expected = s.ix[1:]
+        assert_series_equal(result, expected)
+
         # bad axis
         self.assertRaises(ValueError, s.drop, 'one', axis='columns')
 

From 4ef0e34351bbe987f2797c982cd5867981b07dd6 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Wed, 8 Apr 2015 08:15:25 -0400
Subject: [PATCH 032/239] BUG: skiprows doesn't handle blank lines properly
 when engine='c' (GH #9832)

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/io/tests/test_parsers.py | 22 ++++++++++++++++++++++
 pandas/src/parser/tokenizer.c   | 18 ++++++------------
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 53ca4c0c306e1..ec8a43bc1b0a2 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -102,4 +102,8 @@ Bug Fixes
 =======
 
 - Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
+<<<<<<< HEAD
 >>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
+=======
+- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
+>>>>>>> e67893f... BUG: skiprows doesn't handle blank lines properly when engine='c' (GH #9832)
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 1bf8f7fef7b6b..e65f3ffd7c5a5 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -840,6 +840,28 @@ def test_deep_skiprows(self):
         condensed_data = self.read_csv(StringIO(condensed_text))
         tm.assert_frame_equal(data, condensed_data)
 
+    def test_skiprows_blank(self):
+        # GH 9832
+        text = """#foo,a,b,c
+#foo,a,b,c
+
+#foo,a,b,c
+#foo,a,b,c
+
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+        data = self.read_csv(StringIO(text), skiprows=6, header=None,
+                              index_col=0, parse_dates=True)
+
+        expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
+                             columns=[1, 2, 3],
+                             index=[datetime(2000, 1, 1), datetime(2000, 1, 2),
+                                    datetime(2000, 1, 3)])
+        expected.index.name = 0
+        tm.assert_frame_equal(data, expected)
+
     def test_detect_string_na(self):
         data = """A,B
 foo,bar
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 975142ebacc2a..1bc4096658b29 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -757,11 +757,9 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
         case START_RECORD:
             // start of record
             if (skip_this_line(self, self->file_lines)) {
+                self->state = SKIP_LINE;
                 if (c == '\n') {
-                    END_LINE()
-                }
-                else {
-                    self->state = SKIP_LINE;
+                    END_LINE();
                 }
                 break;
             }
@@ -1093,11 +1091,9 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
         case START_RECORD:
             // start of record
             if (skip_this_line(self, self->file_lines)) {
+                self->state = SKIP_LINE;
                 if (c == self->lineterminator) {
-                    END_LINE()
-                }
-                else {
-                    self->state = SKIP_LINE;
+                    END_LINE();
                 }
                 break;
             }
@@ -1391,11 +1387,9 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
         case START_RECORD:
             // start of record
             if (skip_this_line(self, self->file_lines)) {
+                self->state = SKIP_LINE;
                 if (c == '\n') {
-                    END_LINE()
-                }
-                else {
-                    self->state = SKIP_LINE;
+                    END_LINE();
                 }
                 break;
             } else  if (c == '\n') {

From 94fec1ec213a8e88401766022ffefb0ba60d0bb6 Mon Sep 17 00:00:00 2001
From: lucas <anakin@mbp.local>
Date: Wed, 8 Apr 2015 21:47:13 +0800
Subject: [PATCH 033/239] BUG: Issue 9798 fixed

BUG: #9798 `index_col` shouldn't accept the value `True`

move the check to ``TextFileReader`` ``clean_options``
---
 doc/source/whatsnew/v0.16.1.txt | 5 +++++
 pandas/io/parsers.py            | 2 ++
 pandas/io/tests/test_parsers.py | 5 +++++
 3 files changed, 12 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index ec8a43bc1b0a2..c802e63e962f1 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -106,4 +106,9 @@ Bug Fixes
 >>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
 =======
 - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
+<<<<<<< HEAD
 >>>>>>> e67893f... BUG: skiprows doesn't handle blank lines properly when engine='c' (GH #9832)
+=======
+
+- Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
+>>>>>>> 53f2ea4... BUG: Issue 9798 fixed
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 99fb24ebf91dd..45a85bb63f12c 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -652,6 +652,8 @@ def _clean_options(self, options, engine):
         # really delete this one
         keep_default_na = result.pop('keep_default_na')
 
+        if index_col is True:
+            raise ValueError("The value of index_col couldn't be 'True'")
         if _is_index_col(index_col):
             if not isinstance(index_col, (list, tuple, np.ndarray)):
                 index_col = [index_col]
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index e65f3ffd7c5a5..33579d2d64b29 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -521,6 +521,11 @@ def test_usecols_index_col_False(self):
         df = self.read_csv(StringIO(s_malformed), usecols=cols, index_col=False)
         tm.assert_frame_equal(expected, df)
 
+    def test_index_col_is_True(self):
+        # Issue 9798
+        self.assertRaises(ValueError, self.read_csv, StringIO(self.ts_data),
+                          index_col=True)
+
     def test_converter_index_col_bug(self):
         # 1835
         data = "A;B\n1;2\n3;4"

From b92c341d877ae4a10d43b4ff108d83396a7ed741 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 9 Apr 2015 22:39:39 +0900
Subject: [PATCH 034/239] DOC: Fix release note for v0.16

---
 doc/source/whatsnew/v0.16.0.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index aa35434802799..f9bef3d9c7f4a 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -474,10 +474,11 @@ Other API Changes
 - ``Series.values_counts`` and ``Series.describe`` for categorical data will now put ``NaN`` entries at the end. (:issue:`9443`)
 - ``Series.describe`` for categorical data will now give counts and frequencies of 0, not ``NaN``, for unused categories (:issue:`9443`)
 
-- Due to a bug fix, looking up a partial string label with ``DatetimeIndex.asof`` now includes values that match the string, even if they are after the start of the partial string label (:issue:`9258`). Old behavior:
+- Due to a bug fix, looking up a partial string label with ``DatetimeIndex.asof`` now includes values that match the string, even if they are after the start of the partial string label (:issue:`9258`).
 
-  .. ipython:: python
-    :verbatim:
+  Old behavior:
+
+  .. code-block:: python
 
     In [4]: pd.to_datetime(['2000-01-31', '2000-02-28']).asof('2000-02')
     Out[4]: Timestamp('2000-01-31 00:00:00')

From 669cb13766dab250deed2ba4260f755bc90070d0 Mon Sep 17 00:00:00 2001
From: David Stephens <dstephens99@gmail.com.com>
Date: Tue, 7 Apr 2015 21:43:29 -0700
Subject: [PATCH 035/239] TST: Fix tests in TestGoogle

---
 pandas/io/tests/test_data.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py
index 70a25a45c0ad4..9b27d612cdeee 100644
--- a/pandas/io/tests/test_data.py
+++ b/pandas/io/tests/test_data.py
@@ -33,7 +33,7 @@ def assert_n_failed_equals_n_null_columns(wngs, obj, cls=SymbolWarning):
     all_nan_cols = pd.Series(dict((k, pd.isnull(v).all()) for k, v in
                                   compat.iteritems(obj)))
     n_all_nan_cols = all_nan_cols.sum()
-    valid_warnings = pd.Series([wng for wng in wngs if isinstance(wng, cls)])
+    valid_warnings = pd.Series([wng for wng in wngs if wng.category == cls])
     assert_equal(len(valid_warnings), n_all_nan_cols)
     failed_symbols = all_nan_cols[all_nan_cols].index
     msgs = valid_warnings.map(lambda x: x.message)
@@ -79,7 +79,7 @@ def test_get_goog_volume(self):
         for locale in self.locales:
             with tm.set_locale(locale):
                 df = web.get_data_google('GOOG').sort_index()
-            self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
+            self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
 
     @network
     def test_get_multi1(self):
@@ -87,10 +87,10 @@ def test_get_multi1(self):
             sl = ['AAPL', 'AMZN', 'GOOG']
             with tm.set_locale(locale):
                 pan = web.get_data_google(sl, '2012')
-            ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
+            ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG]
             if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and
                 hasattr(pan.Close, 'AAPL')):
-                self.assertEqual(ts[0].dayofyear, 96)
+                self.assertEqual(ts[0].dayofyear, 3)
             else:
                 self.assertRaises(AttributeError, lambda: pan.Close)
 
@@ -135,7 +135,7 @@ def test_dtypes(self):
     def test_unicode_date(self):
         #GH8967
         data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
-        self.assertEquals(data.index.name, 'Date')
+        self.assertEqual(data.index.name, 'Date')
 
 
 class TestYahoo(tm.TestCase):

From d6a4ee60553fe7f9652ffe8be6f08c701cb81dfa Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Tue, 31 Mar 2015 07:09:43 -0400
Subject: [PATCH 036/239] ENH: Allow conversion of datetime64 and timedelta64
 to string in astype (GH 9757)

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/common.py           | 12 ++++++------
 pandas/tests/test_frame.py      | 27 +++++++++++++++++++++++++++
 pandas/tests/test_series.py     | 13 +++++++++++++
 4 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index c802e63e962f1..5fb6eece53b51 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -30,6 +30,7 @@ Enhancements
     df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
     df.drop(['A', 'X'], axis=1, errors='ignore')
 
+- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
 
 .. _whatsnew_0161.api:
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index ec805aba34d48..0fb35c2fb02fc 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -2637,7 +2637,12 @@ def _astype_nansafe(arr, dtype, copy=True):
     if not isinstance(dtype, np.dtype):
         dtype = _coerce_to_dtype(dtype)
 
-    if is_datetime64_dtype(arr):
+    if issubclass(dtype.type, compat.text_type):
+        # in Py3 that's str, in Py2 that's unicode
+        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
+    elif issubclass(dtype.type, compat.string_types):
+        return lib.astype_str(arr.ravel()).reshape(arr.shape)
+    elif is_datetime64_dtype(arr):
         if dtype == object:
             return tslib.ints_to_pydatetime(arr.view(np.int64))
         elif dtype == np.int64:
@@ -2675,11 +2680,6 @@ def _astype_nansafe(arr, dtype, copy=True):
     elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
         # work around NumPy brokenness, #1987
         return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
-    elif issubclass(dtype.type, compat.text_type):
-        # in Py3 that's str, in Py2 that's unicode
-        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
-    elif issubclass(dtype.type, compat.string_types):
-        return lib.astype_str(arr.ravel()).reshape(arr.shape)
 
     if copy:
         return arr.astype(dtype)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index b8bdd2d4e3b40..94d3ed72ee427 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4192,6 +4192,33 @@ def test_astype_cast_nan_int(self):
         df = DataFrame(data={"Values": [1.0, 2.0, 3.0, np.nan]})
         self.assertRaises(ValueError, df.astype, np.int64)
 
+    def test_astype_str(self):
+        # GH9757
+        dts = Series(date_range('2010-01-04', periods=5))
+        tds = Series([Timedelta(x, unit='d') for x in range(5)])
+        ns = Series(range(5))
+        fs = Series([0.0, 0.2, 0.4, 0.6, 0.8])
+
+        df = DataFrame({
+            'dts' : dts.values,
+            'tds' : tds.values,
+            'ns' : ns.values,
+            'fs' : fs.values,
+            })
+
+        # Test str and unicode on python 2.x and just str on python 3.x
+        for tt in set([str, compat.text_type]):
+            result = df.astype(tt)
+
+            expected = DataFrame({
+                'dts' : list(map(tt, dts.values)),
+                'tds' : list(map(tt, tds.values)),
+                'ns' : list(map(tt, ns.values)),
+                'fs' : list(map(tt, fs.values)),
+                })
+
+            assert_frame_equal(result, expected)
+
     def test_array_interface(self):
         result = np.sqrt(self.frame)
         tm.assert_isinstance(result, type(self.frame))
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index f044fe540ea24..1c64b0d60d23f 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5511,6 +5511,19 @@ def test_astype_str(self):
                 expec = s.map(compat.text_type)
                 assert_series_equal(res, expec)
 
+        # GH9757
+        # Test str and unicode on python 2.x and just str on python 3.x
+        for tt in set([str, compat.text_type]):
+            ts = Series([Timestamp('2010-01-04 00:00:00')])
+            s = ts.astype(tt)
+            expected = Series([tt(ts.values[0])])
+            assert_series_equal(s, expected)
+
+            td = Series([Timedelta(1, unit='d')])
+            s = td.astype(tt)
+            expected = Series([tt(td.values[0])])
+            assert_series_equal(s, expected)
+
     def test_astype_unicode(self):
 
         # GH7758

From 64a9ef357e28c68407c3e3fc06ff1d06b932d3c9 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Thu, 9 Apr 2015 23:53:39 -0400
Subject: [PATCH 037/239] Add tests with time zones, and clean up DataFrame
 test

---
 pandas/tests/test_frame.py  | 25 +++++++++++--------------
 pandas/tests/test_series.py |  5 +++++
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 94d3ed72ee427..6ea76710b4de7 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4194,27 +4194,24 @@ def test_astype_cast_nan_int(self):
 
     def test_astype_str(self):
         # GH9757
-        dts = Series(date_range('2010-01-04', periods=5))
-        tds = Series([Timedelta(x, unit='d') for x in range(5)])
-        ns = Series(range(5))
-        fs = Series([0.0, 0.2, 0.4, 0.6, 0.8])
+        a = Series(date_range('2010-01-04', periods=5))
+        b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern'))
+        c = Series([Timedelta(x, unit='d') for x in range(5)])
+        d = Series(range(5))
+        e = Series([0.0, 0.2, 0.4, 0.6, 0.8])
 
-        df = DataFrame({
-            'dts' : dts.values,
-            'tds' : tds.values,
-            'ns' : ns.values,
-            'fs' : fs.values,
-            })
+        df = DataFrame({'a' : a, 'b' : b, 'c' : c, 'd' : d, 'e' : e})
 
         # Test str and unicode on python 2.x and just str on python 3.x
         for tt in set([str, compat.text_type]):
             result = df.astype(tt)
 
             expected = DataFrame({
-                'dts' : list(map(tt, dts.values)),
-                'tds' : list(map(tt, tds.values)),
-                'ns' : list(map(tt, ns.values)),
-                'fs' : list(map(tt, fs.values)),
+                'a' : list(map(tt, a.values)),
+                'b' : list(map(tt, b.values)),
+                'c' : list(map(tt, c.values)),
+                'd' : list(map(tt, d.values)),
+                'e' : list(map(tt, e.values)),
                 })
 
             assert_frame_equal(result, expected)
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 1c64b0d60d23f..fec98a37b5017 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5519,6 +5519,11 @@ def test_astype_str(self):
             expected = Series([tt(ts.values[0])])
             assert_series_equal(s, expected)
 
+            ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
+            s = ts.astype(tt)
+            expected = Series([tt(ts.values[0])])
+            assert_series_equal(s, expected)
+
             td = Series([Timedelta(1, unit='d')])
             s = td.astype(tt)
             expected = Series([tt(td.values[0])])

From b8061901faf49d400145817e744bfc7b391a5163 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 16 Mar 2015 20:03:45 -0700
Subject: [PATCH 038/239] ENH: add StringMethods (.str accessor) to Index,
 fixes #9068

---
 doc/source/text.rst             | 11 +++++++++--
 doc/source/whatsnew/v0.16.1.txt | 19 +++++++++++++++++-
 pandas/core/base.py             | 19 ++++++++++++++++++
 pandas/core/series.py           | 16 ----------------
 pandas/core/strings.py          | 28 +++++++++++++++++++--------
 pandas/tests/test_index.py      | 34 +++++++++++++++++++++++++++++++++
 pandas/tests/test_series.py     | 13 +++++++++++++
 7 files changed, 113 insertions(+), 27 deletions(-)

diff --git a/doc/source/text.rst b/doc/source/text.rst
index a98153e277fae..ee91ea3c166b6 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -17,10 +17,10 @@ Working with Text Data
 
 .. _text.string_methods:
 
-Series is equipped with a set of string processing methods
+Series and Index are equipped with a set of string processing methods
 that make it easy to operate on each element of the array. Perhaps most
 importantly, these methods exclude missing/NA values automatically. These are
-accessed via the Series's ``str`` attribute and generally have names matching
+accessed via the ``str`` attribute and generally have names matching
 the equivalent (scalar) built-in string methods:
 
 .. ipython:: python
@@ -30,6 +30,13 @@ the equivalent (scalar) built-in string methods:
    s.str.upper()
    s.str.len()
 
+.. ipython:: python
+
+   idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
+   idx.str.strip()
+   idx.str.lstrip()
+   idx.str.rstrip()
+
 Splitting and Replacing Strings
 -------------------------------
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 5fb6eece53b51..382c78c496009 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -18,11 +18,28 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
+- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
 
-- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
+  The `.str` accessor is now available for both `Series` and `Index`.
+
+  .. ipython:: python
 
+     idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
+     idx.str.strip()
 
+  One special case for the `.str` accessor on `Index` is that if a string method returns `bool`, the `.str` accessor
+  will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression
+  to work naturally:
 
+  .. ipython:: python
+
+     idx = Index(['a1', 'a2', 'b1', 'b2'])
+     s = Series(range(4), index=idx)
+     s
+     idx.str.startswith('a')
+     s[s.index.str.startswith('a')]
+
+- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
 - ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
 
   .. ipython:: python
diff --git a/pandas/core/base.py b/pandas/core/base.py
index dde2e74132c4b..a3d3c3791e20c 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -10,6 +10,7 @@
 import pandas.tslib as tslib
 import pandas.lib as lib
 from pandas.util.decorators import Appender, cache_readonly
+from pandas.core.strings import StringMethods
 
 
 _shared_docs = dict()
@@ -497,6 +498,24 @@ def searchsorted(self, key, side='left'):
         #### needs tests/doc-string
         return self.values.searchsorted(key, side=side)
 
+    # string methods
+    def _make_str_accessor(self):
+        from pandas.core.series import Series
+        from pandas.core.index import Index
+        if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
+            # this really should exclude all series with any non-string values,
+            # but that isn't practical for performance reasons until we have a
+            # str dtype (GH 9343)
+            raise AttributeError("Can only use .str accessor with string "
+                                 "values, which use np.object_ dtype in "
+                                 "pandas")
+        elif isinstance(self, Index) and self.inferred_type != 'string':
+            raise AttributeError("Can only use .str accessor with string "
+                                 "values (i.e. inferred_type is 'string')")
+        return StringMethods(self)
+
+    str = AccessorProperty(StringMethods, _make_str_accessor)
+
     _shared_docs['drop_duplicates'] = (
         """Return %(klass)s with duplicate values removed
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 68f3a6032402f..b71c269468d62 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -28,7 +28,6 @@
 from pandas.core import generic, base
 from pandas.core.internals import SingleBlockManager
 from pandas.core.categorical import Categorical, CategoricalAccessor
-from pandas.core.strings import StringMethods
 from pandas.tseries.common import (maybe_to_datetimelike,
                                    CombinedDatetimelikeProperties)
 from pandas.tseries.index import DatetimeIndex
@@ -2494,21 +2493,6 @@ def to_period(self, freq=None, copy=True):
         return self._constructor(new_values,
                                  index=new_index).__finalize__(self)
 
-    #------------------------------------------------------------------------------
-    # string methods
-
-    def _make_str_accessor(self):
-        if not com.is_object_dtype(self.dtype):
-            # this really should exclude all series with any non-string values,
-            # but that isn't practical for performance reasons until we have a
-            # str dtype (GH 9343)
-            raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        return StringMethods(self)
-
-    str = base.AccessorProperty(StringMethods, _make_str_accessor)
-
     #------------------------------------------------------------------------------
     # Datetimelike delegation methods
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 4ef341c481a60..6d20907373014 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from pandas.compat import zip
-from pandas.core.common import isnull, _values_from_object
+from pandas.core.common import isnull, _values_from_object, is_bool_dtype
 import pandas.compat as compat
 from pandas.util.decorators import Appender
 import re
@@ -632,9 +632,10 @@ def str_split(arr, pat=None, n=None, return_type='series'):
     pat : string, default None
         String or regular expression to split on. If None, splits on whitespace
     n : int, default None (all)
-    return_type : {'series', 'frame'}, default 'series
+    return_type : {'series', 'index', 'frame'}, default 'series'
         If frame, returns a DataFrame (elements are strings)
-        If series, returns an Series (elements are lists of strings).
+        If series or index, returns the same type as the original object
+        (elements are lists of strings).
 
     Notes
     -----
@@ -646,9 +647,13 @@ def str_split(arr, pat=None, n=None, return_type='series'):
     """
     from pandas.core.series import Series
     from pandas.core.frame import DataFrame
+    from pandas.core.index import Index
 
-    if return_type not in ('series', 'frame'):
-        raise ValueError("return_type must be {'series', 'frame'}")
+    if return_type not in ('series', 'index', 'frame'):
+        raise ValueError("return_type must be {'series', 'index', 'frame'}")
+    if return_type == 'frame' and isinstance(arr, Index):
+        raise ValueError("return_type='frame' is not supported for string "
+                         "methods on Index")
     if pat is None:
         if n is None or n == 0:
             n = -1
@@ -928,9 +933,9 @@ def do_copy(target):
 class StringMethods(object):
 
     """
-    Vectorized string functions for Series. NAs stay NA unless handled
-    otherwise by a particular method. Patterned after Python's string methods,
-    with some inspiration from R's stringr package.
+    Vectorized string functions for Series and Index. NAs stay NA unless
+    handled otherwise by a particular method. Patterned after Python's string
+    methods, with some inspiration from R's stringr package.
 
     Examples
     --------
@@ -959,11 +964,18 @@ def __iter__(self):
     def _wrap_result(self, result):
         from pandas.core.series import Series
         from pandas.core.frame import DataFrame
+        from pandas.core.index import Index
 
         if not hasattr(result, 'ndim'):
             return result
         elif result.ndim == 1:
             name = getattr(result, 'name', None)
+            if isinstance(self.series, Index):
+                # if result is a boolean np.array, return the np.array
+                # instead of wrapping it into a boolean Index (GH 8875)
+                if is_bool_dtype(result):
+                    return result
+                return Index(result, name=name or self.series.name)
             return Series(result, index=self.series.index,
                           name=name or self.series.name)
         else:
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 61cb337880c00..bb75b12754dca 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1197,6 +1197,40 @@ def test_join_self(self):
             for kind in kinds:
                 joined = res.join(res, how=kind)
                 self.assertIs(res, joined)
+    def test_str_attribute(self):
+        # GH9068
+        methods = ['strip', 'rstrip', 'lstrip']
+        idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
+        for method in methods:
+            expected = Index([getattr(str, method)(x) for x in idx.values])
+            tm.assert_index_equal(getattr(Index.str, method)(idx.str), expected)
+
+        # create a few instances that are not able to use .str accessor
+        indices = [Index(range(5)),
+                   tm.makeDateIndex(10),
+                   MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]),
+                   PeriodIndex(start='2000', end='2010', freq='A')]
+        for idx in indices:
+            with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'):
+                idx.str.repeat(2)
+
+        idx = Index(['a b c', 'd e', 'f'])
+        expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']])
+        tm.assert_index_equal(idx.str.split(), expected)
+        tm.assert_index_equal(idx.str.split(return_type='series'), expected)
+        # return_type 'index' is an alias for 'series'
+        tm.assert_index_equal(idx.str.split(return_type='index'), expected)
+        with self.assertRaisesRegexp(ValueError, 'not supported'):
+            idx.str.split(return_type='frame')
+
+        # test boolean case, should return np.array instead of boolean Index
+        idx = Index(['a1', 'a2', 'b1', 'b2'])
+        expected = np.array([True, True, False, False])
+        self.assert_array_equal(idx.str.startswith('a'), expected)
+        self.assertIsInstance(idx.str.startswith('a'), np.ndarray)
+        s = Series(range(4), index=idx)
+        expected = Series(range(2), index=['a1', 'a2'])
+        tm.assert_series_equal(s[s.index.str.startswith('a')], expected)
 
     def test_indexing_doesnt_change_class(self):
         idx = Index([1, 2, 3, 'a', 'b', 'c'])
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index fec98a37b5017..70a6e2541692a 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -4933,6 +4933,19 @@ def test_to_csv_path_is_none(self):
         csv_str = s.to_csv(path=None)
         self.assertIsInstance(csv_str, str)
 
+    def test_str_attribute(self):
+        # GH9068
+        methods = ['strip', 'rstrip', 'lstrip']
+        s = Series([' jack', 'jill ', ' jesse ', 'frank'])
+        for method in methods:
+            expected = Series([getattr(str, method)(x) for x in s.values])
+            assert_series_equal(getattr(Series.str, method)(s.str), expected)
+
+        # str accessor only valid with string values
+        s = Series(range(5))
+        with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'):
+            s.str.repeat(2)
+
     def test_clip(self):
         val = self.ts.median()
 

From 44861eb4367ad3a24d61a5628b0879b7c5d55e72 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 11 Apr 2015 17:39:46 +0900
Subject: [PATCH 039/239] BUG: plot(kind=hist) results in TypeError for
 non-numeric data

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/tests/test_graphics.py   | 12 ++++++++++++
 pandas/tools/plotting.py        |  3 ++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 382c78c496009..2fa02d70b6d4b 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -107,6 +107,7 @@ Bug Fixes
 
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 
+- Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 36c19cd39f76c..7d489ce66c288 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -678,6 +678,18 @@ def test_hist_df_kwargs(self):
         ax = df.plot(kind='hist', bins=5)
         self.assertEqual(len(ax.patches), 10)
 
+    @slow
+    def test_hist_df_with_nonnumerics(self):
+        # GH 9853
+        with tm.RNGContext(1):
+            df = DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
+        df['E'] = ['x', 'y'] * 5
+        ax = df.plot(kind='hist', bins=5)
+        self.assertEqual(len(ax.patches), 20)
+
+        ax = df.plot(kind='hist') # bins=10
+        self.assertEqual(len(ax.patches), 40)
+
     @slow
     def test_hist_legacy(self):
         _check_plot_works(self.ts.hist)
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 358c5b0dd5940..1accc48b0d3c4 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -1948,7 +1948,8 @@ def __init__(self, data, bins=10, bottom=0, **kwargs):
     def _args_adjust(self):
         if com.is_integer(self.bins):
             # create common bin edge
-            values = np.ravel(self.data.values)
+            values = self.data.convert_objects()._get_numeric_data()
+            values = np.ravel(values)
             values = values[~com.isnull(values)]
 
             hist, self.bins = np.histogram(values, bins=self.bins,

From 67ac08a66da96ab9778e505e1d546a40e56bc79f Mon Sep 17 00:00:00 2001
From: Artemy Kolchinsky <akolchin@indiana.edu>
Date: Wed, 15 Oct 2014 14:39:51 -0400
Subject: [PATCH 040/239] Fix to allow sparse dataframes to have nan column
 labels

Support for nan columns

Fix

Trigger Travis CI

jreback fixes

Release note update
---
 doc/source/whatsnew/v0.16.1.txt    |  6 ++++++
 pandas/sparse/frame.py             | 18 +++++++++---------
 pandas/sparse/tests/test_sparse.py |  6 ++++++
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 2fa02d70b6d4b..d22542d01fb0f 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -130,4 +130,10 @@ Bug Fixes
 =======
 
 - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
+<<<<<<< HEAD
 >>>>>>> 53f2ea4... BUG: Issue 9798 fixed
+=======
+
+- Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
+
+>>>>>>> 7879205... Fix to allow sparse dataframes to have nan column labels
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
index 30b06c8a93142..bc022fcb6542b 100644
--- a/pandas/sparse/frame.py
+++ b/pandas/sparse/frame.py
@@ -100,7 +100,7 @@ def __init__(self, data=None, index=None, columns=None,
             mgr = self._init_mgr(
                 data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy)
         elif data is None:
-            data = {}
+            data = DataFrame()
 
             if index is None:
                 index = Index([])
@@ -115,7 +115,7 @@ def __init__(self, data=None, index=None, columns=None,
                                           index=index,
                                           kind=self._default_kind,
                                           fill_value=self._default_fill_value)
-            mgr = dict_to_manager(data, columns, index)
+            mgr = df_to_manager(data, columns, index)
             if dtype is not None:
                 mgr = mgr.astype(dtype)
 
@@ -155,7 +155,7 @@ def _init_dict(self, data, index, columns, dtype=None):
                                          kind=self._default_kind,
                                          fill_value=self._default_fill_value,
                                          copy=True)
-        sdict = {}
+        sdict = DataFrame()
         for k, v in compat.iteritems(data):
             if isinstance(v, Series):
                 # Force alignment, no copy necessary
@@ -181,7 +181,7 @@ def _init_dict(self, data, index, columns, dtype=None):
             if c not in sdict:
                 sdict[c] = sp_maker(nan_vec)
 
-        return dict_to_manager(sdict, columns, index)
+        return df_to_manager(sdict, columns, index)
 
     def _init_matrix(self, data, index, columns, dtype=None):
         data = _prep_ndarray(data, copy=False)
@@ -228,12 +228,12 @@ def _unpickle_sparse_frame_compat(self, state):
         else:
             index = idx
 
-        series_dict = {}
+        series_dict = DataFrame()
         for col, (sp_index, sp_values) in compat.iteritems(series):
             series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index,
                                             fill_value=fv)
 
-        self._data = dict_to_manager(series_dict, columns, index)
+        self._data = df_to_manager(series_dict, columns, index)
         self._default_fill_value = fv
         self._default_kind = kind
 
@@ -737,13 +737,13 @@ def applymap(self, func):
         """
         return self.apply(lambda x: lmap(func, x))
 
-def dict_to_manager(sdict, columns, index):
-    """ create and return the block manager from a dict of series, columns, index """
+def df_to_manager(sdf, columns, index):
+    """ create and return the block manager from a dataframe of series, columns, index """
 
     # from BlockManager perspective
     axes = [_ensure_index(columns), _ensure_index(index)]
 
-    return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes)
+    return create_block_manager_from_arrays([sdf[c] for c in columns], columns, axes)
 
 
 def stack_sparse_frame(frame):
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index f187e7f883e11..454cbcd5320e9 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -1663,6 +1663,12 @@ def test_as_blocks(self):
         self.assertEqual(list(df_blocks.keys()), ['float64'])
         assert_frame_equal(df_blocks['float64'], df)
 
+    def test_nan_columnname(self):
+        # GH 8822
+        nan_colname = DataFrame(Series(1.0,index=[0]),columns=[nan])
+        nan_colname_sparse = nan_colname.to_sparse()
+        self.assertTrue(np.isnan(nan_colname_sparse.columns[0]))
+
 
 def _dense_series_compare(s, f):
     result = f(s)

From 8d495e597492e9f00c09d169596e7c6e0b9384b6 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 10 Apr 2015 16:28:50 +0200
Subject: [PATCH 041/239] Fix: unequal comparisons of categorical and scalar

Before, unequal comparisons were not checking the order of the
categories.

This was due to a conversion to an ndarray, which turned the
comparison to one between ndarray and scalar, which of course
has no categories to take into account.

Also add test cases and remove the one which actually tested the
wrong behaviour.
---
 doc/source/whatsnew/v0.16.1.txt  |  4 ++++
 pandas/core/ops.py               | 28 +++++++++++++++----------
 pandas/tests/test_categorical.py | 35 +++++++++++++++++++++++++++++---
 3 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index d22542d01fb0f..f67d4a88881c9 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -136,4 +136,8 @@ Bug Fixes
 
 - Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
 
+<<<<<<< HEAD
 >>>>>>> 7879205... Fix to allow sparse dataframes to have nan column labels
+=======
+- Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
+>>>>>>> f0ac930... Fix: unequal comparisons of categorical and scalar
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 954d2c8a77326..2af9cd43faaef 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -594,20 +594,26 @@ def wrapper(self, other):
 
         mask = isnull(self)
 
-        values = self.get_values()
-        other = _index.convert_scalar(values,_values_from_object(other))
+        if com.is_categorical_dtype(self):
+            # cats are a special case as get_values() would return an ndarray, which would then
+            # not take categories ordering into account
+            # we can go directly to op, as the na_op would just test again and dispatch to it.
+            res = op(self.values, other)
+        else:
+            values = self.get_values()
+            other = _index.convert_scalar(values,_values_from_object(other))
 
-        if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
-            values = values.view('i8')
+            if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
+                values = values.view('i8')
 
-        # scalars
-        res = na_op(values, other)
-        if np.isscalar(res):
-            raise TypeError('Could not compare %s type with Series'
-                            % type(other))
+            # scalars
+            res = na_op(values, other)
+            if np.isscalar(res):
+                raise TypeError('Could not compare %s type with Series'
+                                % type(other))
 
-        # always return a full value series here
-        res = _values_from_object(res)
+            # always return a full value series here
+            res = _values_from_object(res)
 
         res = pd.Series(res, index=self.index, name=self.name,
                         dtype='bool')
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 7f4b3fcb94dfa..4c5678bf6633f 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -114,6 +114,9 @@ def f():
             Categorical([1,2], [1,2,np.nan, np.nan])
         self.assertRaises(ValueError, f)
 
+        # The default should be unordered
+        c1 = Categorical(["a", "b", "c", "a"])
+        self.assertFalse(c1.ordered)
 
         # Categorical as input
         c1 = Categorical(["a", "b", "c", "a"])
@@ -367,6 +370,13 @@ def f():
             self.assertRaises(TypeError, lambda: a < cat)
             self.assertRaises(TypeError, lambda: a < cat_rev)
 
+        # Make sure that unequal comparison take the categories order in account
+        cat_rev = pd.Categorical(list("abc"), categories=list("cba"), ordered=True)
+        exp = np.array([True, False, False])
+        res = cat_rev > "b"
+        self.assert_numpy_array_equal(res, exp)
+
+
     def test_na_flags_int_categories(self):
         # #1457
 
@@ -2390,6 +2400,18 @@ def test_comparisons(self):
             exp = Series([False, False, True])
             tm.assert_series_equal(res, exp)
 
+            scalar = base[1]
+            res = cat > scalar
+            exp = Series([False, False, True])
+            exp2 = cat.values > scalar
+            tm.assert_series_equal(res, exp)
+            tm.assert_numpy_array_equal(res.values, exp2)
+            res_rev = cat_rev > scalar
+            exp_rev = Series([True, False, False])
+            exp_rev2 = cat_rev.values > scalar
+            tm.assert_series_equal(res_rev, exp_rev)
+            tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
+
             # Only categories with same categories can be compared
             def f():
                 cat > cat_rev
@@ -2408,9 +2430,16 @@ def f():
             self.assertRaises(TypeError, lambda: a < cat)
             self.assertRaises(TypeError, lambda: a < cat_rev)
 
-            # Categoricals can be compared to scalar values
-            res = cat_rev > base[0]
-            tm.assert_series_equal(res, exp)
+        # unequal comparison should raise for unordered cats
+        cat = Series(Categorical(list("abc")))
+        def f():
+            cat > "b"
+        self.assertRaises(TypeError, f)
+        cat = Series(Categorical(list("abc"), ordered=False))
+        def f():
+            cat > "b"
+        self.assertRaises(TypeError, f)
+
 
         # And test NaN handling...
         cat = Series(Categorical(["a","b","c", np.nan]))

From abfdeda67b09c41abdc01dd957669bd7973d9fa0 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 11 Apr 2015 11:47:02 -0700
Subject: [PATCH 042/239] DOC: add more examples to StringMethods on Index

---
 doc/source/text.rst | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/doc/source/text.rst b/doc/source/text.rst
index ee91ea3c166b6..f417f56f51fbc 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -37,6 +37,32 @@ the equivalent (scalar) built-in string methods:
    idx.str.lstrip()
    idx.str.rstrip()
 
+The string methods on Index are especially useful for cleaning up or
+transforming DataFrame columns. For instance, you may have columns with
+leading or trailing whitespace:
+
+.. ipython:: python
+
+   df = DataFrame(randn(3, 2), columns=[' Column A ', ' Column B '],
+                  index=range(3))
+   df
+
+Since ``df.columns`` is an Index object, we can use the ``.str`` accessor
+
+.. ipython:: python
+
+   df.columns.str.strip()
+   df.columns.str.lower()
+
+These string methods can then be used to clean up the columns as needed.
+Here we are removing leading and trailing whitespaces, lowercasing all names,
+and replacing any remaining whitespaces with underscores:
+
+.. ipython:: python
+
+   df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
+   df
+
 Splitting and Replacing Strings
 -------------------------------
 

From a835b5ad77bea0d2839296f13b986a12346e8acb Mon Sep 17 00:00:00 2001
From: behzad nouri <behzadnouri@gmail.com>
Date: Thu, 9 Apr 2015 18:59:02 -0400
Subject: [PATCH 043/239] memory access bug in read_csv causing segfault

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/io/tests/test_cparser.py | 22 +++++++++
 pandas/parser.pyx               | 79 +++++++++++----------------------
 pandas/src/parser/tokenizer.c   | 30 ++++++-------
 pandas/src/parser/tokenizer.h   |  9 ++--
 5 files changed, 69 insertions(+), 72 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index f67d4a88881c9..0062eb421890f 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -91,6 +91,7 @@ Bug Fixes
 
 - Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated.
 - Bug in json serialization when frame has length zero.(:issue:`9805`)
+- Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`)
 
 
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py
index ad6f071d738ff..93d55c654de90 100644
--- a/pandas/io/tests/test_cparser.py
+++ b/pandas/io/tests/test_cparser.py
@@ -336,6 +336,28 @@ def test_empty_field_eof(self):
                     2: np.array(['3', ''], dtype=object)}
         assert_array_dicts_equal(result, expected)
 
+        # GH5664
+        a = DataFrame([['b'], [nan]], columns=['a'], index=['a', 'c'])
+        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]],
+                      columns=list('abcd'),
+                      index=[1, 1])
+        c = DataFrame([[1, 2, 3, 4], [6, nan, nan, nan],
+                       [8, 9, 10, 11], [13, 14, nan, nan]],
+                       columns=list('abcd'),
+                       index=[0, 5, 7, 12])
+
+        for _ in range(100):
+            df = read_csv(StringIO('a,b\nc\n'), skiprows=0,
+                          names=['a'], engine='c')
+            assert_frame_equal(df, a)
+
+            df = read_csv(StringIO('1,1,1,1,0\n'*2 + '\n'*2),
+                          names=list("abcd"), engine='c')
+            assert_frame_equal(df, b)
+
+            df = read_csv(StringIO('0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14'),
+                          names=list('abcd'), engine='c')
+            assert_frame_equal(df, c)
 
 def assert_array_dicts_equal(left, right):
     for k, v in compat.iteritems(left):
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index d13781d6fa132..73a03fc5cef7c 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -175,7 +175,7 @@ cdef extern from "parser/tokenizer.h":
         int col
 
     void coliter_setup(coliter_t *it, parser_t *parser, int i, int start)
-    char* COLITER_NEXT(coliter_t it)
+    void COLITER_NEXT(coliter_t, const char *)
 
     parser_t* parser_new()
 
@@ -212,7 +212,7 @@ cdef extern from "parser/tokenizer.h":
     inline int to_longlong(char *item, long long *p_value)
 #    inline int to_longlong_thousands(char *item, long long *p_value,
 #                                     char tsep)
-    int to_boolean(char *item, uint8_t *val)
+    int to_boolean(const char *item, uint8_t *val)
 
 
 cdef extern from "parser/io.h":
@@ -1279,7 +1279,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
         Py_ssize_t i
         size_t lines
         coliter_t it
-        char *word
+        const char *word = NULL
         ndarray[object] result
 
         int ret = 0
@@ -1296,7 +1296,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
     coliter_setup(&it, parser, col, line_start)
 
     for i in range(lines):
-        word = COLITER_NEXT(it)
+        COLITER_NEXT(it, word)
 
         if na_filter:
             k = kh_get_str(na_hashset, word)
@@ -1333,7 +1333,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
         Py_ssize_t i
         size_t lines
         coliter_t it
-        char *word
+        const char *word = NULL
         ndarray[object] result
 
         int ret = 0
@@ -1350,7 +1350,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
     coliter_setup(&it, parser, col, line_start)
 
     for i in range(lines):
-        word = COLITER_NEXT(it)
+        COLITER_NEXT(it, word)
 
         if na_filter:
             k = kh_get_str(na_hashset, word)
@@ -1388,7 +1388,7 @@ cdef _string_box_decode(parser_t *parser, int col,
         Py_ssize_t i, size
         size_t lines
         coliter_t it
-        char *word
+        const char *word = NULL
         ndarray[object] result
 
         int ret = 0
@@ -1407,7 +1407,7 @@ cdef _string_box_decode(parser_t *parser, int col,
     coliter_setup(&it, parser, col, line_start)
 
     for i in range(lines):
-        word = COLITER_NEXT(it)
+        COLITER_NEXT(it, word)
 
         if na_filter:
             k = kh_get_str(na_hashset, word)
@@ -1444,7 +1444,7 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
         int error
         Py_ssize_t i, j
         coliter_t it
-        char *word
+        const char *word = NULL
         char *data
         ndarray result
 
@@ -1454,7 +1454,7 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
     coliter_setup(&it, parser, col, line_start)
 
     for i in range(line_end - line_start):
-        word = COLITER_NEXT(it)
+        COLITER_NEXT(it, word)
         strncpy(data, word, width)
         data += width
 
@@ -1469,7 +1469,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
         int error, na_count = 0
         size_t i, lines
         coliter_t it
-        char *word
+        const char *word = NULL
         char *p_end
         double *data
         double NA = na_values[np.float64]
@@ -1485,7 +1485,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
 
     if na_filter:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
 
             k = kh_get_str(na_hashset, word)
             # in the hash table
@@ -1509,7 +1509,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
             data += 1
     else:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             data[0] = parser.converter(word, &p_end, parser.decimal, parser.sci,
                                          parser.thousands, 1)
             if errno != 0 or p_end[0] or p_end == word:
@@ -1530,7 +1530,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
         int error, na_count = 0
         size_t i, lines
         coliter_t it
-        char *word
+        const char *word = NULL
         int64_t *data
         ndarray result
 
@@ -1544,7 +1544,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
 
     if na_filter:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             k = kh_get_str(na_hashset, word)
             # in the hash table
             if k != na_hashset.n_buckets:
@@ -1561,7 +1561,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
                 return None, None
     else:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
                                    &error, parser.thousands)
             if error != 0:
@@ -1578,7 +1578,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
         int error, na_count = 0
         size_t i, lines
         coliter_t it
-        char *word
+        const char *word = NULL
         uint8_t *data
         ndarray result
 
@@ -1592,7 +1592,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
 
     if na_filter:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
 
             k = kh_get_str(na_hashset, word)
             # in the hash table
@@ -1608,7 +1608,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
             data += 1
     else:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
 
             error = to_boolean(word, data)
             if error != 0:
@@ -1625,7 +1625,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
         int error, na_count = 0
         size_t i, lines
         coliter_t it
-        char *word
+        const char *word = NULL
         uint8_t *data
         ndarray result
 
@@ -1639,7 +1639,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
 
     if na_filter:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
 
             k = kh_get_str(na_hashset, word)
             # in the hash table
@@ -1667,7 +1667,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
             data += 1
     else:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
 
             k = kh_get_str(true_hashset, word)
             if k != true_hashset.n_buckets:
@@ -1688,33 +1688,6 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
 
     return result.view(np.bool_), na_count
 
-cdef _get_na_mask(parser_t *parser, int col, int line_start, int line_end,
-                  kh_str_t *na_hashset):
-    cdef:
-        int error
-        Py_ssize_t i
-        size_t lines
-        coliter_t it
-        char *word
-        ndarray[uint8_t, cast=True] result
-        khiter_t k
-
-    lines = line_end - line_start
-    result = np.empty(lines, dtype=np.bool_)
-
-    coliter_setup(&it, parser, col, line_start)
-    for i in range(lines):
-        word = COLITER_NEXT(it)
-
-        k = kh_get_str(na_hashset, word)
-        # in the hash table
-        if k != na_hashset.n_buckets:
-            result[i] = 1
-        else:
-            result[i] = 0
-
-    return result
-
 cdef kh_str_t* kset_from_list(list values) except NULL:
     # caller takes responsibility for freeing the hash table
     cdef:
@@ -1897,7 +1870,7 @@ cdef _apply_converter(object f, parser_t *parser, int col,
         Py_ssize_t i
         size_t lines
         coliter_t it
-        char *word
+        const char *word = NULL
         char *errors = "strict"
         ndarray[object] result
         object val
@@ -1909,17 +1882,17 @@ cdef _apply_converter(object f, parser_t *parser, int col,
 
     if not PY3 and c_encoding == NULL:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             val = PyBytes_FromString(word)
             result[i] = f(val)
     elif ((PY3 and c_encoding == NULL) or c_encoding == b'utf-8'):
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             val = PyUnicode_FromString(word)
             result[i] = f(val)
     else:
         for i in range(lines):
-            word = COLITER_NEXT(it)
+            COLITER_NEXT(it, word)
             val = PyUnicode_Decode(word, strlen(word),
                                    c_encoding, errors)
             result[i] = f(val)
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 1bc4096658b29..1850aab50b55a 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -38,7 +38,7 @@ See LICENSE for the license
 *  RESTORE_FINAL   (2):
 *      Put the file position at the next byte after the
 *      data read from the file_buffer.
-* 
+*
 #define RESTORE_NOT     0
 #define RESTORE_INITIAL 1
 #define RESTORE_FINAL   2
@@ -304,7 +304,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
                                         self->stream_len,
                                         &self->stream_cap, nbytes * 2,
                                         sizeof(char), &status);
-    TRACE(("make_stream_space: self->stream=%p, self->stream_len = %zu, self->stream_cap=%zu, status=%zu\n", 
+    TRACE(("make_stream_space: self->stream=%p, self->stream_len = %zu, self->stream_cap=%zu, status=%zu\n",
            self->stream, self->stream_len, self->stream_cap, status))
 
     if (status != 0) {
@@ -334,7 +334,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
                                        self->words_len,
                                        &self->words_cap, nbytes,
                                        sizeof(char*), &status);
-    TRACE(("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, %d)\n", 
+    TRACE(("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, %d)\n",
            self->words_len, self->words_cap, nbytes, status))
     if (status != 0) {
         return PARSER_OUT_OF_MEMORY;
@@ -371,7 +371,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
                                           self->lines + 1,
                                           &self->lines_cap, nbytes,
                                           sizeof(int), &status);
-    TRACE(("make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", 
+    TRACE(("make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n",
            self->lines + 1, self->lines_cap, nbytes, status))
     if (status != 0) {
         return PARSER_OUT_OF_MEMORY;
@@ -398,7 +398,7 @@ static int push_char(parser_t *self, char c) {
     /* TRACE(("pushing %c \n", c)) */
     TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n", self->stream_len+1, c, self->stream_cap))
     if (self->stream_len >= self->stream_cap) {
-        TRACE(("push_char: ERROR!!! self->stream_len(%d) >= self->stream_cap(%d)\n", 
+        TRACE(("push_char: ERROR!!! self->stream_len(%d) >= self->stream_cap(%d)\n",
                self->stream_len, self->stream_cap))
         self->error_msg = (char*) malloc(64);
         sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
@@ -463,7 +463,6 @@ static void append_warning(parser_t *self, const char *msg) {
 
 static int end_line(parser_t *self) {
     int fields;
-    khiter_t k;  /* for hash set detection */
     int ex_fields = self->expected_fields;
     char *msg;
 
@@ -483,7 +482,7 @@ static int end_line(parser_t *self) {
         TRACE(("end_line: Skipping row %d\n", self->file_lines));
         // increment file line count
         self->file_lines++;
-        
+
         // skip the tokens from this bad line
         self->line_start[self->lines] += fields;
 
@@ -605,12 +604,11 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) {
 static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
     int status;
     size_t bytes_read;
-    void *src = self->source;
 
     status = 0;
     self->datapos = 0;
     self->data = self->cb_io(self->source, nbytes, &bytes_read, &status);
-    TRACE(("parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n", 
+    TRACE(("parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n",
            nbytes, bytes_read, status));
     self->datalen = bytes_read;
 
@@ -704,7 +702,7 @@ typedef int (*parser_op)(parser_t *self, size_t line_limit);
 
 int skip_this_line(parser_t *self, int64_t rownum) {
     if (self->skipset != NULL) {
-        return ( kh_get_int64((kh_int64_t*) self->skipset, self->file_lines) != 
+        return ( kh_get_int64((kh_int64_t*) self->skipset, self->file_lines) !=
                  ((kh_int64_t*)self->skipset)->n_buckets );
     }
     else {
@@ -784,7 +782,7 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
                 else
                     self->state = EAT_CRNL;
                 break;
-            } 
+            }
             else if (c == self->commentchar) {
                 self->state = EAT_LINE_COMMENT;
                 break;
@@ -1750,7 +1748,7 @@ int parser_trim_buffers(parser_t *self) {
 
     /* trim stream */
     new_cap = _next_pow2(self->stream_len) + 1;
-    TRACE(("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = %zu\n", 
+    TRACE(("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = %zu\n",
            new_cap, self->stream_cap, self->lines_cap));
     if (new_cap < self->stream_cap) {
         TRACE(("parser_trim_buffers: new_cap < self->stream_cap, calling safe_realloc\n"));
@@ -1871,7 +1869,7 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
             }
         }
 
-        TRACE(("_tokenize_helper: Trying to process %d bytes, datalen=%d, datapos= %d\n", 
+        TRACE(("_tokenize_helper: Trying to process %d bytes, datalen=%d, datapos= %d\n",
                self->datalen - self->datapos, self->datalen, self->datapos));
         /* TRACE(("sourcetype: %c, status: %d\n", self->sourcetype, status)); */
 
@@ -2033,7 +2031,7 @@ int P_INLINE to_longlong_thousands(char *item, long long *p_value, char tsep)
     return status;
 }*/
 
-int to_boolean(char *item, uint8_t *val) {
+int to_boolean(const char *item, uint8_t *val) {
     char *tmp;
     int i, status = 0;
 
@@ -2357,7 +2355,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
             num_digits++;
             num_decimals++;
         }
-        
+
         if (num_digits >= max_digits) // consume extra decimal digits
             while (isdigit(*p))
                 ++p;
@@ -2653,4 +2651,4 @@ uint64_t str_to_uint64(const char *p_item, uint64_t uint_max, int *error)
     *error = 0;
     return number;
 }
-*/
\ No newline at end of file
+*/
diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h
index 694a73ec78153..d3777e858b6ca 100644
--- a/pandas/src/parser/tokenizer.h
+++ b/pandas/src/parser/tokenizer.h
@@ -228,9 +228,12 @@ coliter_t *coliter_new(parser_t *self, int i);
 /* #define COLITER_NEXT(iter) iter->words[iter->line_start[iter->line++] + iter->col] */
 // #define COLITER_NEXT(iter) iter.words[iter.line_start[iter.line++] + iter.col]
 
-#define COLITER_NEXT(iter) iter.words[*iter.line_start++ + iter.col]
+#define COLITER_NEXT(iter, word) do { \
+    const int i = *iter.line_start++ + iter.col; \
+    word = i < *iter.line_start ? iter.words[i]: ""; \
+    } while(0)
 
-parser_t* parser_new();
+parser_t* parser_new(void);
 
 int parser_init(parser_t *self);
 
@@ -270,6 +273,6 @@ double round_trip(const char *p, char **q, char decimal, char sci, char tsep, in
 //int P_INLINE to_complex(char *item, double *p_real, double *p_imag, char sci, char decimal);
 int P_INLINE to_longlong(char *item, long long *p_value);
 //int P_INLINE to_longlong_thousands(char *item, long long *p_value, char tsep);
-int to_boolean(char *item, uint8_t *val);
+int to_boolean(const char *item, uint8_t *val);
 
 #endif // _PARSER_COMMON_H_

From c2f61e0bb7100656559cd660d732d139e340ce66 Mon Sep 17 00:00:00 2001
From: David Stephens <dstephens99@gmail.com>
Date: Fri, 27 Mar 2015 19:18:10 -0700
Subject: [PATCH 044/239] BUG: raw_locales unreachable in
 util.testing.get_locales

---
 pandas/tests/test_util.py |  4 ++++
 pandas/util/testing.py    | 16 +++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py
index 2e22b33dc769a..bb8bd3df96b71 100644
--- a/pandas/tests/test_util.py
+++ b/pandas/tests/test_util.py
@@ -79,6 +79,10 @@ def test_warning(self):
         with tm.assert_produces_warning(FutureWarning):
             self.assertNotAlmostEquals(1, 2)
 
+    def test_locale(self):
+        #GH9744
+        locales = pandas.util.testing.get_locales()
+        self.assertTrue(len(locales) >= 1)
 
 def test_rands():
     r = tm.rands(10)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 3d9a0e7b43634..b4baedada46e1 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -331,19 +331,21 @@ def get_locales(prefix=None, normalize=True,
         # raw_locales is "\n" seperated list of locales
         # it may contain non-decodable parts, so split
         # extract what we can and then rejoin.
-        raw_locales = []
+        raw_locales = raw_locales.split(b'\n')
+        out_locales = []
         for x in raw_locales:
-            try:
-                raw_locales.append(str(x, encoding=pd.options.display.encoding))
-            except:
-                pass
+            if compat.PY3:
+                out_locales.append(str(x, encoding=pd.options.display.encoding))
+            else:
+                out_locales.append(str(x))
+
     except TypeError:
         pass
 
     if prefix is None:
-        return _valid_locales(raw_locales, normalize)
+        return _valid_locales(out_locales, normalize)
 
-    found = re.compile('%s.*' % prefix).findall('\n'.join(raw_locales))
+    found = re.compile('%s.*' % prefix).findall('\n'.join(out_locales))
     return _valid_locales(found, normalize)
 
 
From ad3549d7fdb9cc2d5c39e258b5a0f89db277d2fe Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 11 Apr 2015 18:23:10 +0900
Subject: [PATCH 045/239] BUG/CLN: Repeated time-series plot may raise
 TypeError

---
 doc/source/whatsnew/v0.16.1.txt       |   1 +
 pandas/tools/plotting.py              | 107 ++++++++++----------------
 pandas/tseries/tests/test_plotting.py |  32 ++++++++
 3 files changed, 75 insertions(+), 65 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0062eb421890f..0374ade03290a 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -109,6 +109,7 @@ Bug Fixes
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
+- Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 1accc48b0d3c4..6a284e547433a 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -885,28 +885,16 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         if fillna is not None:
             data = data.fillna(fillna)
 
-        from pandas.core.frame import DataFrame
-        if isinstance(data, (Series, np.ndarray, Index)):
-            label = self.label if self.label is not None else data.name
+        if self.sort_columns:
+            columns = com._try_sort(data.columns)
+        else:
+            columns = data.columns
+
+        for col in columns:
             if keep_index is True:
-                yield label, data
+                yield col, data[col]
             else:
-                yield label, np.asarray(data)
-        elif isinstance(data, DataFrame):
-            if self.sort_columns:
-                columns = com._try_sort(data.columns)
-            else:
-                columns = data.columns
-
-            for col in columns:
-                # # is this right?
-                # empty = df[col].count() == 0
-                # values = df[col].values if not empty else np.zeros(len(df))
-
-                if keep_index is True:
-                    yield col, data[col]
-                else:
-                    yield col, data[col].values
+                yield col, data[col].values
 
     @property
     def nseries(self):
@@ -1006,7 +994,15 @@ def result(self):
                 return self.axes[0]
 
     def _compute_plot_data(self):
-        numeric_data = self.data.convert_objects()._get_numeric_data()
+        data = self.data
+
+        if isinstance(data, Series):
+            label = self.kwds.pop('label', None)
+            if label is None and data.name is None:
+                label = 'None'
+            data = data.to_frame(name=label)
+
+        numeric_data = data.convert_objects()._get_numeric_data()
 
         try:
             is_empty = numeric_data.empty
@@ -1027,12 +1023,7 @@ def _add_table(self):
         if self.table is False:
             return
         elif self.table is True:
-            from pandas.core.frame import DataFrame
-            if isinstance(self.data, Series):
-                data = DataFrame(self.data, columns=[self.data.name])
-            elif isinstance(self.data, DataFrame):
-                data = self.data
-            data = data.transpose()
+            data = self.data.transpose()
         else:
             data = self.table
         ax = self._get_ax(0)
@@ -1099,18 +1090,15 @@ def _apply_axis_properties(self, axis, rot=None, fontsize=None):
 
     @property
     def legend_title(self):
-        if hasattr(self.data, 'columns'):
-            if not isinstance(self.data.columns, MultiIndex):
-                name = self.data.columns.name
-                if name is not None:
-                    name = com.pprint_thing(name)
-                return name
-            else:
-                stringified = map(com.pprint_thing,
-                                  self.data.columns.names)
-                return ','.join(stringified)
+        if not isinstance(self.data.columns, MultiIndex):
+            name = self.data.columns.name
+            if name is not None:
+                name = com.pprint_thing(name)
+            return name
         else:
-            return None
+            stringified = map(com.pprint_thing,
+                              self.data.columns.names)
+            return ','.join(stringified)
 
     def _add_legend_handle(self, handle, label, index=None):
         if not label is None:
@@ -1256,12 +1244,10 @@ def _get_ax(self, i):
         return ax
 
     def on_right(self, i):
-        from pandas.core.frame import DataFrame
         if isinstance(self.secondary_y, bool):
             return self.secondary_y
 
-        if (isinstance(self.data, DataFrame) and
-                isinstance(self.secondary_y, (tuple, list, np.ndarray, Index))):
+        if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)):
             return self.data.columns[i] in self.secondary_y
 
     def _get_style(self, i, col_name):
@@ -1553,16 +1539,14 @@ def __init__(self, data, **kwargs):
             self.x_compat = bool(self.kwds.pop('x_compat'))
 
     def _index_freq(self):
-        from pandas.core.frame import DataFrame
-        if isinstance(self.data, (Series, DataFrame)):
-            freq = getattr(self.data.index, 'freq', None)
-            if freq is None:
-                freq = getattr(self.data.index, 'inferred_freq', None)
-                if freq == 'B':
-                    weekdays = np.unique(self.data.index.dayofweek)
-                    if (5 in weekdays) or (6 in weekdays):
-                        freq = None
-            return freq
+        freq = getattr(self.data.index, 'freq', None)
+        if freq is None:
+            freq = getattr(self.data.index, 'inferred_freq', None)
+            if freq == 'B':
+                weekdays = np.unique(self.data.index.dayofweek)
+                if (5 in weekdays) or (6 in weekdays):
+                    freq = None
+        return freq
 
     def _is_dynamic_freq(self, freq):
         if isinstance(freq, DateOffset):
@@ -1574,9 +1558,7 @@ def _is_dynamic_freq(self, freq):
 
     def _no_base(self, freq):
         # hack this for 0.10.1, creating more technical debt...sigh
-        from pandas.core.frame import DataFrame
-        if (isinstance(self.data, (Series, DataFrame))
-            and isinstance(self.data.index, DatetimeIndex)):
+        if isinstance(self.data.index, DatetimeIndex):
             base = frequencies.get_freq(freq)
             x = self.data.index
             if (base <= frequencies.FreqGroup.FR_DAY):
@@ -1686,17 +1668,13 @@ def _update_prior(self, y):
     def _maybe_convert_index(self, data):
         # tsplot converts automatically, but don't want to convert index
         # over and over for DataFrames
-        from pandas.core.frame import DataFrame
-        if (isinstance(data.index, DatetimeIndex) and
-                isinstance(data, DataFrame)):
+        if isinstance(data.index, DatetimeIndex):
             freq = getattr(data.index, 'freq', None)
 
             if freq is None:
                 freq = getattr(data.index, 'inferred_freq', None)
             if isinstance(freq, DateOffset):
                 freq = freq.rule_code
-            freq = frequencies.get_base_alias(freq)
-            freq = frequencies.get_period_alias(freq)
 
             if freq is None:
                 ax = self._get_ax(0)
@@ -1705,9 +1683,10 @@ def _maybe_convert_index(self, data):
             if freq is None:
                 raise ValueError('Could not get frequency alias for plotting')
 
-            data = DataFrame(data.values,
-                             index=data.index.to_period(freq=freq),
-                             columns=data.columns)
+            freq = frequencies.get_base_alias(freq)
+            freq = frequencies.get_period_alias(freq)
+
+            data.index = data.index.to_period(freq=freq)
         return data
 
     def _post_plot_logic(self):
@@ -2522,9 +2501,7 @@ def plot_series(data, kind='line', ax=None,                    # Series unique
     if ax is None and len(plt.get_fignums()) > 0:
         ax = _gca()
         ax = getattr(ax, 'left_ax', ax)
-    # is there harm in this?
-    if label is None:
-        label = data.name
+
     return _plot(data, kind=kind, ax=ax,
                  figsize=figsize, use_index=use_index, title=title,
                  grid=grid, legend=legend,
diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py
index c4e642ffe43b0..bdc0aa02f2715 100644
--- a/pandas/tseries/tests/test_plotting.py
+++ b/pandas/tseries/tests/test_plotting.py
@@ -636,6 +636,38 @@ def test_mixed_freq_irregular_first(self):
         x2 = lines[1].get_xdata()
         assert_array_equal(x2, s1.index.asobject.values)
 
+    def test_mixed_freq_regular_first_df(self):
+        # GH 9852
+        import matplotlib.pyplot as plt
+        s1 = tm.makeTimeSeries().to_frame()
+        s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :]
+        ax = s1.plot()
+        ax2 = s2.plot(style='g', ax=ax)
+        lines = ax2.get_lines()
+        idx1 = PeriodIndex(lines[0].get_xdata())
+        idx2 = PeriodIndex(lines[1].get_xdata())
+        self.assertTrue(idx1.equals(s1.index.to_period('B')))
+        self.assertTrue(idx2.equals(s2.index.to_period('B')))
+        left, right = ax2.get_xlim()
+        pidx = s1.index.to_period()
+        self.assertEqual(left, pidx[0].ordinal)
+        self.assertEqual(right, pidx[-1].ordinal)
+
+    @slow
+    def test_mixed_freq_irregular_first_df(self):
+        # GH 9852
+        import matplotlib.pyplot as plt
+        s1 = tm.makeTimeSeries().to_frame()
+        s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :]
+        ax = s2.plot(style='g')
+        ax = s1.plot(ax=ax)
+        self.assertFalse(hasattr(ax, 'freq'))
+        lines = ax.get_lines()
+        x1 = lines[0].get_xdata()
+        assert_array_equal(x1, s2.index.asobject.values)
+        x2 = lines[1].get_xdata()
+        assert_array_equal(x2, s1.index.asobject.values)
+
     def test_mixed_freq_hf_first(self):
         idxh = date_range('1/1/1999', periods=365, freq='D')
         idxl = date_range('1/1/1999', periods=12, freq='M')

From 52875a1019ab3c668970dbc5142f507fbeb4c44b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 12 Apr 2015 10:01:31 -0400
Subject: [PATCH 046/239] DOC: add note about hosted dev docs

---
 doc/source/contributing.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 2112d5b127e64..7785f5fe3283d 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -381,6 +381,13 @@ browser to see the full documentation you just built::
 
 And you'll have the satisfaction of seeing your new and improved documentation!
 
+.. _contributing.dev_docs:
+
+Built Master Branch Documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When pull-requests are merged into the pandas *master* branch, the main parts of the documentation are
+also built by Travis-CI. These docs are then hosted `here <http://pandas-docs.github.io/pandas-docs-travis>`_.
 
 Contributing to the code base
 =============================

From 9351ee6df9f14aeae802625c6d7e864649e26896 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sun, 12 Apr 2015 11:53:38 -0700
Subject: [PATCH 047/239] DOC/CLN: fixed several typos in categorical.rst

---
 doc/source/categorical.rst | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index d03e0fb117c5c..11e7fb0fd4117 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -23,11 +23,11 @@ Categorical Data
 .. versionadded:: 0.15
 
 .. note::
-    While there was in `pandas.Categorical` in earlier versions, the ability to use
+    While there was `pandas.Categorical` in earlier versions, the ability to use
     categorical data in `Series` and `DataFrame` is new.
 
 
-This is a introduction to pandas categorical data type, including a short comparison
+This is an introduction to pandas categorical data type, including a short comparison
 with R's ``factor``.
 
 `Categoricals` are a pandas data type, which correspond to categorical variables in
@@ -276,7 +276,7 @@ Sorting and Order
 
 .. warning::
 
-   The default for construction has change in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True``
+   The default for construction has changed in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True``
 
 If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a
 meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`.
@@ -347,15 +347,15 @@ Multi Column Sorting
 ~~~~~~~~~~~~~~~~~~~~
 
 A categorical dtyped column will partcipate in a multi-column sort in a similar manner to other columns.
-The ordering of the categorical is determined by the ``categories`` of that columns.
+The ordering of the categorical is determined by the ``categories`` of that column.
 
 .. ipython:: python
 
-   dfs = DataFrame({'A' : Categorical(list('bbeebbaa'),categories=['e','a','b'],ordered=True),
+   dfs = DataFrame({'A' : Categorical(list('bbeebbaa'), categories=['e','a','b'], ordered=True),
                     'B' : [1,2,1,2,2,1,2,1] })
-   dfs.sort(['A','B'])
+   dfs.sort(['A', 'B'])
 
-Reordering the ``categories``, changes a future sort.
+Reordering the ``categories`` changes a future sort.
 
 .. ipython:: python
 
@@ -380,7 +380,7 @@ categories or a categorical with any list-like object, will raise a TypeError.
 
     Any "non-equality" comparisons of categorical data with a `Series`, `np.array`, `list` or
     categorical data with different categories or ordering will raise an `TypeError` because custom
-    categories ordering could be interpreted in two ways: one with taking in account the
+    categories ordering could be interpreted in two ways: one with taking into account the
     ordering and one without.
 
 .. ipython:: python
@@ -471,7 +471,7 @@ Data munging
 ------------
 
 The optimized pandas data access methods  ``.loc``, ``.iloc``, ``.ix`` ``.at``, and ``.iat``,
-work as normal, the only difference is the return type (for getting) and
+work as normal. The only difference is the return type (for getting) and
 that only values already in `categories` can be assigned.
 
 Getting
@@ -707,8 +707,8 @@ an ``object`` dtype is a constant times the length of the data.
 
 .. note::
 
-   If the number of categories approaches the length of the data, the ``Categorical`` will use nearly (or more) memory than an
-   equivalent ``object`` dtype representation.
+   If the number of categories approaches the length of the data, the ``Categorical`` will use nearly the same or
+   more memory than an equivalent ``object`` dtype representation.
 
    .. ipython:: python
 

From 3582b1c3765c7eb931a74df51e34dbbf8a60b06e Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sun, 12 Apr 2015 19:41:10 -0700
Subject: [PATCH 048/239] DOC/CLN: fixed bloolean indexing example, cleaned up
 typos

---
 doc/source/indexing.rst | 19 ++++++++++---------
 doc/source/options.rst  |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index fc074802353ee..2eabc35fd831d 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -30,9 +30,9 @@ The axis labeling information in pandas objects serves many purposes:
 In this section, we will focus on the final point: namely, how to slice, dice,
 and generally get and set subsets of pandas objects. The primary focus will be
 on Series and DataFrame as they have received more development attention in
-this area. Expect more work to be invested higher-dimensional data structures
-(including ``Panel``) in the future, especially in label-based advanced
-indexing.
+this area. Expect more work to be invested in higher-dimensional data
+structures (including ``Panel``) in the future, especially in label-based
+advanced indexing.
 
 .. note::
 
@@ -54,7 +54,7 @@ indexing.
 
 .. warning::
 
-   In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
+   In 0.15.0 ``Index`` has internally been refactored to no longer subclass ``ndarray``
    but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be
    a transparent change with only very limited API implications (See the :ref:`Internal Refactoring <whatsnew_0150.refactoring>`)
 
@@ -225,9 +225,9 @@ new column.
 
    sa.a = 5
    sa
-   dfa.A = list(range(len(dfa.index)))       # ok if A already exists
+   dfa.A = list(range(len(dfa.index)))  # ok if A already exists
    dfa
-   dfa['A'] = list(range(len(dfa.index)))    # use this form to create a new column
+   dfa['A'] = list(range(len(dfa.index)))  # use this form to create a new column
    dfa
 
 .. warning::
@@ -314,7 +314,7 @@ Selection By Label
      dfl.loc['20130102':'20130104']
 
 pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
-**at least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
+**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
 
 The ``.loc`` attribute is the primary access method. The following are valid inputs:
 
@@ -578,9 +578,10 @@ Using a boolean vector to index a Series works exactly as in a numpy ndarray:
 
 .. ipython:: python
 
+   s = Series(range(-3, 4))
+   s
    s[s > 0]
-   s[(s < 0) & (s > -0.5)]
-   s[(s < -1) | (s > 1 )]
+   s[(s < -1) | (s > 0.5)]
    s[~(s < 0)]
 
 You may select rows from a DataFrame using a boolean vector the same length as
diff --git a/doc/source/options.rst b/doc/source/options.rst
index 7e36f369bc7e7..4b69015353612 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -18,7 +18,7 @@ Overview
 pandas has an options system that lets you customize some aspects of its behaviour,
 display-related options being those the user is most likely to adjust.
 
-Options have a full "dotted-style", case-insensitive name (e.g. ``display.max_rows``),
+Options have a full "dotted-style", case-insensitive name (e.g. ``display.max_rows``).
 You can get/set options directly as attributes of the top-level ``options`` attribute:
 
 .. ipython:: python
@@ -29,7 +29,7 @@ You can get/set options directly as attributes of the top-level ``options`` attr
    pd.options.display.max_rows
 
 There is also an API composed of 5 relevant functions, available directly from the ``pandas``
-namespace, and they are:
+namespace:
 
 - :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option.
 - :func:`~pandas.reset_option` - reset one or more options to their default value.
@@ -412,7 +412,7 @@ mode.use_inf_as_null       False        True means treat None, NaN, -INF,
 Number Formatting
 ------------------
 
-pandas also allow you to set how numbers are displayed in the console.
+pandas also allows you to set how numbers are displayed in the console.
 This option is not set through the ``set_options`` API.
 
 Use the ``set_eng_float_format`` function

From db8b1a0d38601c4c55a93433af1caa1aad8b1294 Mon Sep 17 00:00:00 2001
From: Artemy Kolchinsky <akolchin@indiana.edu>
Date: Wed, 15 Oct 2014 14:39:51 -0400
Subject: [PATCH 049/239] ENH: Allow get_dummies to return sparse dataframe

ENH: Allow get_dummies to return sparse dataframe

ENH: Allow get_dummies to return sparse dataframe

Fix

Fix

Fixes

Bug in order of columns

Slight speed improvement

get_dummies update

Release notes update

Remove convert dummies test
---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/reshape.py          | 61 ++++++++++++++++++++++++---------
 pandas/tests/test_reshape.py    | 58 +++++++++++++++++--------------
 3 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0374ade03290a..ceea29c92c9c0 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -48,6 +48,7 @@ Enhancements
     df.drop(['A', 'X'], axis=1, errors='ignore')
 
 - Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
+- ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return DataFrame is sparse. (:issue:`8823`)
 
 .. _whatsnew_0161.api:
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index 291a73778197a..af98e533cb5b7 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -9,6 +9,10 @@
 from pandas.core.series import Series
 from pandas.core.frame import DataFrame
 
+from pandas.core.sparse import SparseDataFrame, SparseSeries
+from pandas.sparse.array import SparseArray
+from pandas._sparse import IntIndex
+
 from pandas.core.categorical import Categorical
 from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
                                 isnull)
@@ -932,7 +936,7 @@ def melt_stub(df, stub, i, j):
     return newdf.set_index([i, j])
 
 def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
-                columns=None):
+                columns=None, sparse=False):
     """
     Convert categorical variable into dummy/indicator variables
 
@@ -953,6 +957,8 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
         Column names in the DataFrame to be encoded.
         If `columns` is None then all the columns with
         `object` or `category` dtype will be converted.
+    sparse : bool, default False
+        Whether the returned DataFrame should be sparse or not.
 
     Returns
     -------
@@ -1039,16 +1045,17 @@ def check_len(item, name):
         with_dummies = [result]
         for (col, pre, sep) in zip(columns_to_encode, prefix, prefix_sep):
 
-            dummy = _get_dummies_1d(data[col], prefix=pre,
-                                    prefix_sep=sep, dummy_na=dummy_na)
+            dummy = _get_dummies_1d(data[col], prefix=pre, prefix_sep=sep, 
+                                    dummy_na=dummy_na, sparse=sparse)
             with_dummies.append(dummy)
         result = concat(with_dummies, axis=1)
     else:
-        result = _get_dummies_1d(data, prefix, prefix_sep, dummy_na)
+        result = _get_dummies_1d(data, prefix, prefix_sep, dummy_na,
+                                 sparse=sparse)
     return result
 
 
-def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
+def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False, sparse=False):
     # Series avoids inconsistent NaN handling
     cat = Categorical.from_array(Series(data), ordered=True)
     levels = cat.categories
@@ -1059,19 +1066,17 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
             index = data.index
         else:
             index = np.arange(len(data))
-        return DataFrame(index=index)
-
-    number_of_cols = len(levels)
-    if dummy_na:
-        number_of_cols += 1
-
-    dummy_mat = np.eye(number_of_cols).take(cat.codes, axis=0)
+        if not sparse:
+            return DataFrame(index=index)
+        else:
+            return SparseDataFrame(index=index)
 
+    codes = cat.codes.copy()
     if dummy_na:
+        codes[codes == -1] = len(cat.categories)
         levels = np.append(cat.categories, np.nan)
-    else:
-        # reset NaN GH4446
-        dummy_mat[cat.codes == -1] = 0
+
+    number_of_cols = len(levels)
 
     if prefix is not None:
         dummy_cols = ['%s%s%s' % (prefix, prefix_sep, v)
@@ -1084,7 +1089,31 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
     else:
         index = None
 
-    return DataFrame(dummy_mat, index=index, columns=dummy_cols)
+    if sparse:
+        sparse_series = {}
+        N = len(data)
+        sp_indices = [ [] for _ in range(len(dummy_cols)) ]
+        for ndx, code in enumerate(codes):
+            if code == -1:
+                # Blank entries if not dummy_na and code == -1, #GH4446
+                continue
+            sp_indices[code].append(ndx)
+
+        for col, ixs in zip(dummy_cols, sp_indices):
+            sarr = SparseArray(np.ones(len(ixs)), sparse_index=IntIndex(N, ixs),
+                               fill_value=0)
+            sparse_series[col] = SparseSeries(data=sarr, index=index)
+
+        return SparseDataFrame(sparse_series, index=index, columns=dummy_cols)
+
+    else:
+        dummy_mat = np.eye(number_of_cols).take(codes, axis=0)
+
+        if not dummy_na:
+            # reset NaN GH4446
+            dummy_mat[codes == -1] = 0
+
+        return DataFrame(dummy_mat, index=index, columns=dummy_cols)
 
 
 def make_axis_dummies(frame, axis='minor', transform=None):
diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py
index 66f5110830c72..346c9e2598985 100644
--- a/pandas/tests/test_reshape.py
+++ b/pandas/tests/test_reshape.py
@@ -151,6 +151,8 @@ def test_multiindex(self):
 
 class TestGetDummies(tm.TestCase):
 
+    sparse = False
+
     def setUp(self):
         self.df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'],
                              'C': [1, 2, 3]})
@@ -163,20 +165,20 @@ def test_basic(self):
         expected = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0},
                               'b': {0: 0.0, 1: 1.0, 2: 0.0},
                               'c': {0: 0.0, 1: 0.0, 2: 1.0}})
-        assert_frame_equal(get_dummies(s_list), expected)
-        assert_frame_equal(get_dummies(s_series), expected)
+        assert_frame_equal(get_dummies(s_list, sparse=self.sparse), expected)
+        assert_frame_equal(get_dummies(s_series, sparse=self.sparse), expected)
 
         expected.index = list('ABC')
-        assert_frame_equal(get_dummies(s_series_index), expected)
+        assert_frame_equal(get_dummies(s_series_index, sparse=self.sparse), expected)
 
     def test_just_na(self):
         just_na_list = [np.nan]
         just_na_series = Series(just_na_list)
         just_na_series_index = Series(just_na_list, index = ['A'])
 
-        res_list = get_dummies(just_na_list)
-        res_series = get_dummies(just_na_series)
-        res_series_index = get_dummies(just_na_series_index)
+        res_list = get_dummies(just_na_list, sparse=self.sparse)
+        res_series = get_dummies(just_na_series, sparse=self.sparse)
+        res_series_index = get_dummies(just_na_series_index, sparse=self.sparse)
 
         self.assertEqual(res_list.empty, True)
         self.assertEqual(res_series.empty, True)
@@ -188,12 +190,13 @@ def test_just_na(self):
 
     def test_include_na(self):
         s = ['a', 'b', np.nan]
-        res = get_dummies(s)
+        res = get_dummies(s, sparse=self.sparse)
         exp = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0},
                          'b': {0: 0.0, 1: 1.0, 2: 0.0}})
         assert_frame_equal(res, exp)
 
-        res_na = get_dummies(s, dummy_na=True)
+        # Sparse dataframes do not allow nan labelled columns, see #GH8822
+        res_na = get_dummies(s, dummy_na=True, sparse=self.sparse)
         exp_na = DataFrame({nan: {0: 0.0, 1: 0.0, 2: 1.0},
                             'a': {0: 1.0, 1: 0.0, 2: 0.0},
                             'b': {0: 0.0, 1: 1.0, 2: 0.0}}).reindex_axis(['a', 'b', nan], 1)
@@ -201,7 +204,7 @@ def test_include_na(self):
         exp_na.columns = res_na.columns
         assert_frame_equal(res_na, exp_na)
 
-        res_just_na = get_dummies([nan], dummy_na=True)
+        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse)
         exp_just_na = DataFrame(Series(1.0,index=[0]),columns=[nan])
         assert_array_equal(res_just_na.values, exp_just_na.values)
 
@@ -210,21 +213,21 @@ def test_unicode(self):  # See GH 6885 - get_dummies chokes on unicode values
         e = 'e'
         eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
         s = [e, eacute, eacute]
-        res = get_dummies(s, prefix='letter')
+        res = get_dummies(s, prefix='letter', sparse=self.sparse)
         exp = DataFrame({'letter_e': {0: 1.0, 1: 0.0, 2: 0.0},
                         u('letter_%s') % eacute: {0: 0.0, 1: 1.0, 2: 1.0}})
         assert_frame_equal(res, exp)
 
     def test_dataframe_dummies_all_obj(self):
         df = self.df[['A', 'B']]
-        result = get_dummies(df)
+        result = get_dummies(df, sparse=self.sparse)
         expected = DataFrame({'A_a': [1., 0, 1], 'A_b': [0., 1, 0],
                               'B_b': [1., 1, 0], 'B_c': [0., 0, 1]})
         assert_frame_equal(result, expected)
 
     def test_dataframe_dummies_mix_default(self):
         df = self.df
-        result = get_dummies(df)
+        result = get_dummies(df, sparse=self.sparse)
         expected = DataFrame({'C': [1, 2, 3], 'A_a': [1., 0, 1],
                               'A_b': [0., 1, 0], 'B_b': [1., 1, 0],
                               'B_c': [0., 0, 1]})
@@ -235,7 +238,7 @@ def test_dataframe_dummies_prefix_list(self):
         prefixes = ['from_A', 'from_B']
         df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'],
                         'C': [1, 2, 3]})
-        result = get_dummies(df, prefix=prefixes)
+        result = get_dummies(df, prefix=prefixes, sparse=self.sparse)
         expected = DataFrame({'C': [1, 2, 3], 'from_A_a': [1., 0, 1],
                               'from_A_b': [0., 1, 0], 'from_B_b': [1., 1, 0],
                               'from_B_c': [0., 0, 1]})
@@ -243,10 +246,10 @@ def test_dataframe_dummies_prefix_list(self):
                              'from_B_c']]
         assert_frame_equal(result, expected)
 
-    def test_datafrmae_dummies_prefix_str(self):
+    def test_dataframe_dummies_prefix_str(self):
         # not that you should do this...
         df = self.df
-        result = get_dummies(df, prefix='bad')
+        result = get_dummies(df, prefix='bad', sparse=self.sparse)
         expected = DataFrame([[1, 1., 0., 1., 0.],
                               [2, 0., 1., 1., 0.],
                               [3, 1., 0., 0., 1.]],
@@ -256,40 +259,40 @@ def test_datafrmae_dummies_prefix_str(self):
     def test_dataframe_dummies_subset(self):
         df = self.df
         result = get_dummies(df, prefix=['from_A'],
-                             columns=['A'])
+                             columns=['A'], sparse=self.sparse)
         expected = DataFrame({'from_A_a': [1., 0, 1], 'from_A_b': [0., 1, 0],
                               'B': ['b', 'b', 'c'], 'C': [1, 2, 3]})
         assert_frame_equal(result, expected)
 
     def test_dataframe_dummies_prefix_sep(self):
         df = self.df
-        result = get_dummies(df, prefix_sep='..')
+        result = get_dummies(df, prefix_sep='..', sparse=self.sparse)
         expected = DataFrame({'C': [1, 2, 3], 'A..a': [1., 0, 1],
                               'A..b': [0., 1, 0], 'B..b': [1., 1, 0],
                               'B..c': [0., 0, 1]})
         expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']]
         assert_frame_equal(result, expected)
 
-        result = get_dummies(df, prefix_sep=['..', '__'])
+        result = get_dummies(df, prefix_sep=['..', '__'], sparse=self.sparse)
         expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'})
         assert_frame_equal(result, expected)
 
-        result = get_dummies(df, prefix_sep={'A': '..', 'B': '__'})
+        result = get_dummies(df, prefix_sep={'A': '..', 'B': '__'}, sparse=self.sparse)
         assert_frame_equal(result, expected)
 
     def test_dataframe_dummies_prefix_bad_length(self):
         with tm.assertRaises(ValueError):
-            get_dummies(self.df, prefix=['too few'])
+            get_dummies(self.df, prefix=['too few'], sparse=self.sparse)
 
     def test_dataframe_dummies_prefix_sep_bad_length(self):
         with tm.assertRaises(ValueError):
-            get_dummies(self.df, prefix_sep=['bad'])
+            get_dummies(self.df, prefix_sep=['bad'], sparse=self.sparse)
 
     def test_dataframe_dummies_prefix_dict(self):
         prefixes = {'A': 'from_A', 'B': 'from_B'}
         df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'],
                         'C': [1, 2, 3]})
-        result = get_dummies(df, prefix=prefixes)
+        result = get_dummies(df, prefix=prefixes, sparse=self.sparse)
         expected = DataFrame({'from_A_a': [1., 0, 1], 'from_A_b': [0., 1, 0],
                               'from_B_b': [1., 1, 0], 'from_B_c': [0., 0, 1],
                               'C': [1, 2, 3]})
@@ -298,7 +301,7 @@ def test_dataframe_dummies_prefix_dict(self):
     def test_dataframe_dummies_with_na(self):
         df = self.df
         df.loc[3, :] = [np.nan, np.nan, np.nan]
-        result = get_dummies(df, dummy_na=True)
+        result = get_dummies(df, dummy_na=True, sparse=self.sparse)
         expected = DataFrame({'C': [1, 2, 3, np.nan], 'A_a': [1., 0, 1, 0],
             'A_b': [0., 1, 0, 0], 'A_nan': [0., 0, 0, 1], 'B_b': [1., 1, 0, 0],
             'B_c': [0., 0, 1, 0], 'B_nan': [0., 0, 0, 1]})
@@ -306,14 +309,14 @@ def test_dataframe_dummies_with_na(self):
                              'B_nan']]
         assert_frame_equal(result, expected)
 
-        result = get_dummies(df, dummy_na=False)
+        result = get_dummies(df, dummy_na=False, sparse=self.sparse)
         expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
         assert_frame_equal(result, expected)
 
     def test_dataframe_dummies_with_categorical(self):
         df = self.df
         df['cat'] = pd.Categorical(['x', 'y', 'y'])
-        result = get_dummies(df)
+        result = get_dummies(df, sparse=self.sparse)
         expected = DataFrame({'C': [1, 2, 3], 'A_a': [1., 0, 1],
                               'A_b': [0., 1, 0], 'B_b': [1., 1, 0],
                               'B_c': [0., 0, 1], 'cat_x': [1., 0, 0],
@@ -322,6 +325,11 @@ def test_dataframe_dummies_with_categorical(self):
                              'cat_x', 'cat_y']]
         assert_frame_equal(result, expected)
 
+
+class TestGetDummiesSparse(TestGetDummies):
+    sparse = True
+
+
 class TestLreshape(tm.TestCase):
 
     def test_pairs(self):

From 5ed4801b02887bfd66adb28634b34af250133966 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 13 Apr 2015 09:05:36 -0400
Subject: [PATCH 050/239] wip

---
 doc/source/whatsnew/v0.16.1.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index ceea29c92c9c0..3a23fb5cef729 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -48,7 +48,7 @@ Enhancements
     df.drop(['A', 'X'], axis=1, errors='ignore')
 
 - Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
-- ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return DataFrame is sparse. (:issue:`8823`)
+- ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`)
 
 .. _whatsnew_0161.api:
 

From 7ecc49a6c6942f5e8fa1f7a39ae6a2fa0c8c3f41 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Sun, 12 Apr 2015 23:37:47 +0200
Subject: [PATCH 051/239] BUG: Fix for comparisons of categorical and an scalar
 not in categories, xref GH9836

Up to now, a comparison of categorical data and a scalar, which
is not in the categories would return `False` for all elements when
it should raise a `TypeError`, which it now does.

Also fix that `!=` comparisons would return `False` for all elements
when the more logical choice would be `True`.
---
 doc/source/whatsnew/v0.16.1.txt  |  5 +++++
 pandas/core/categorical.py       |  9 ++++++++-
 pandas/tests/test_categorical.py | 27 +++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 3a23fb5cef729..bd1aef2dea04e 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -143,4 +143,9 @@ Bug Fixes
 >>>>>>> 7879205... Fix to allow sparse dataframes to have nan column labels
 =======
 - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
+<<<<<<< HEAD
 >>>>>>> f0ac930... Fix: unequal comparisons of categorical and scalar
+=======
+
+- Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
+>>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 991678a8e7d79..b79f2c9b4f6df 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -61,7 +61,14 @@ def f(self, other):
                 i = self.categories.get_loc(other)
                 return getattr(self._codes, op)(i)
             else:
-                return np.repeat(False, len(self))
+                if op == '__eq__':
+                    return np.repeat(False, len(self))
+                elif op == '__ne__':
+                    return np.repeat(True, len(self))
+                else:
+                    msg  = "Cannot compare a Categorical for op {op} with a scalar, " \
+                           "which is not a category."
+                    raise TypeError(msg.format(op=op))
         else:
 
             # allow categorical vs object dtype array comparisons for equality
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 4c5678bf6633f..af48774492b11 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1087,6 +1087,20 @@ def test_reflected_comparison_with_scalars(self):
         self.assert_numpy_array_equal(cat > cat[0], [False, True, True])
         self.assert_numpy_array_equal(cat[0] < cat, [False, True, True])
 
+    def test_comparison_with_unknown_scalars(self):
+        # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 and following
+        # comparisons with scalars not in categories should raise for unequal comps, but not for
+        # equal/not equal
+        cat = pd.Categorical([1, 2, 3], ordered=True)
+
+        self.assertRaises(TypeError, lambda: cat < 4)
+        self.assertRaises(TypeError, lambda: cat > 4)
+        self.assertRaises(TypeError, lambda: 4 < cat)
+        self.assertRaises(TypeError, lambda: 4 > cat)
+
+        self.assert_numpy_array_equal(cat == 4 , [False, False, False])
+        self.assert_numpy_array_equal(cat != 4 , [True, True, True])
+
 
 class TestCategoricalAsBlock(tm.TestCase):
     _multiprocess_can_split_ = True
@@ -2440,6 +2454,19 @@ def f():
             cat > "b"
         self.assertRaises(TypeError, f)
 
+        # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 and following
+        # comparisons with scalars not in categories should raise for unequal comps, but not for
+        # equal/not equal
+        cat = Series(Categorical(list("abc"), ordered=True))
+
+        self.assertRaises(TypeError, lambda: cat < "d")
+        self.assertRaises(TypeError, lambda: cat > "d")
+        self.assertRaises(TypeError, lambda: "d" < cat)
+        self.assertRaises(TypeError, lambda: "d" > cat)
+
+        self.assert_series_equal(cat == "d" , Series([False, False, False]))
+        self.assert_series_equal(cat != "d" , Series([True, True, True]))
+
 
         # And test NaN handling...
         cat = Series(Categorical(["a","b","c", np.nan]))

From f893ac5e7668db59e8c19cc69c523da4daf3e7fb Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris.gilmer@gmail.com>
Date: Mon, 13 Apr 2015 10:24:55 -0400
Subject: [PATCH 052/239] GH9570 allow timedelta string conversion without
 leading zero

---
 doc/source/whatsnew/v0.16.1.txt         | 2 ++
 pandas/tseries/tests/test_timedeltas.py | 7 +++++++
 pandas/tseries/timedeltas.py            | 7 +++----
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index bd1aef2dea04e..3643e8d939694 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -50,6 +50,8 @@ Enhancements
 - Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
 - ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`)
 
+- Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
+
 .. _whatsnew_0161.api:
 
 API changes
diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index b74a3a59d3bca..bc51e01ca9bdf 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -64,6 +64,13 @@ def test_construction(self):
         self.assertEqual(Timedelta(123072001000000).value, 123072001000000)
         self.assertTrue('1 days 10:11:12.001' in str(Timedelta(123072001000000)))
 
+        # string conversion with/without leading zero
+        # GH 9570
+        self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0))
+        self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0))
+        self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1))
+        self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1))
+
         # more strings
         # GH 8190
         self.assertEqual(Timedelta('1 h'), timedelta(hours=1))
diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py
index 91e75da1b551c..5b353058f0093 100644
--- a/pandas/tseries/timedeltas.py
+++ b/pandas/tseries/timedeltas.py
@@ -119,7 +119,7 @@ def _validate_timedelta_unit(arg):
 _short_search = re.compile(
     "^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)
 _full_search = re.compile(
-    "^\s*(?P<neg>-?)\s*(?P<days>\d*\.?\d*)?\s*(days|d|day)?,?\s*\+?(?P<time>\d{2}:\d{2}:\d{2})?(?P<frac>\.\d+)?\s*$",re.IGNORECASE)
+    "^\s*(?P<neg>-?)\s*(?P<days>\d*?\.?\d*?)?\s*(days|d|day)?,?\s*\+?(?P<time>\d{1,2}:\d{2}:\d{2})?(?P<frac>\.\d+)?\s*$",re.IGNORECASE)
 _nat_search = re.compile(
     "^\s*(nat|nan)\s*$",re.IGNORECASE)
 _whitespace = re.compile('^\s*$')
@@ -209,13 +209,12 @@ def convert(r=None, unit=None, m=m):
             is_neg = gd['neg']
             if gd['days']:
                 days = int((float(gd['days'] or 0) * 86400)*1e9)
-                if gd['neg']:
+                if is_neg:
                     days *= -1
                 value += days
             else:
-                if gd['neg']:
+                if is_neg:
                     value *= -1
-
             return tslib.cast_from_unit(value, 'ns')
         return convert
 

From edf80223c10d509912e9b2873dc2490eebc8df14 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 13 Apr 2015 16:26:46 -0400
Subject: [PATCH 053/239] Fix blosc compressor test

---
 pandas/io/tests/test_packers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index d85e75f5d2818..92e0d7ba1a338 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -476,6 +476,10 @@ def test_compression_zlib(self):
             assert_frame_equal(self.frame[k], i_rec[k])
 
     def test_compression_blosc(self):
+        try:
+            import blosc
+        except ImportError:
+            raise nose.SkipTest('no blosc')
         i_rec = self.encode_decode(self.frame, compress='blosc')
         for k in self.frame.keys():
             assert_frame_equal(self.frame[k], i_rec[k])

From f33f3f506430fd56cf196a63f2055e40460ee4cf Mon Sep 17 00:00:00 2001
From: Dan Birken <birken@gmail.com>
Date: Mon, 23 Mar 2015 17:35:38 -0700
Subject: [PATCH 054/239] BUG: Fix error when reading postgres table with
 timezone #7139

`read_sql_table()` will break if it reads a table with a `timestamp
with time zone` column if individual rows within that column have
different time zones. This is very common due to daylight savings time.

Pandas right now does not have good support for a Series containing
datetimes with different time zones (hence this bug).  So this change
simply converts a `timestamp with time zone` column into UTC during
import, which pandas has great support for.
---
 doc/source/whatsnew/v0.16.1.txt |  2 +
 pandas/io/sql.py                | 18 ++++++--
 pandas/io/tests/test_sql.py     | 77 ++++++++++++++++++++++++++++-----
 3 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 3643e8d939694..a8a512cfe4229 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -106,6 +106,8 @@ Bug Fixes
 
 - Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
 
+- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
+
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
 
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 117d7b4a9ceaa..7c70b4b1df492 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -83,14 +83,14 @@ def _handle_date_column(col, format=None):
         return to_datetime(col, **format)
     else:
         if format in ['D', 's', 'ms', 'us', 'ns']:
-            return to_datetime(col, coerce=True, unit=format)
+            return to_datetime(col, coerce=True, unit=format, utc=True)
         elif (issubclass(col.dtype.type, np.floating)
                 or issubclass(col.dtype.type, np.integer)):
             # parse dates as timestamp
             format = 's' if format is None else format
-            return to_datetime(col, coerce=True, unit=format)
+            return to_datetime(col, coerce=True, unit=format, utc=True)
         else:
-            return to_datetime(col, coerce=True, format=format)
+            return to_datetime(col, coerce=True, format=format, utc=True)
 
 
 def _parse_date_columns(data_frame, parse_dates):
@@ -318,6 +318,10 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
     -------
     DataFrame
 
+    Notes
+    -----
+    Any datetime values with time zone information will be converted to UTC
+
     See also
     --------
     read_sql_query : Read SQL query into a DataFrame.
@@ -390,6 +394,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
     -------
     DataFrame
 
+    Notes
+    -----
+    Any datetime values with time zone information parsed via the `parse_dates`
+    parameter will be converted to UTC
+
     See also
     --------
     read_sql_table : Read SQL database table into a DataFrame
@@ -451,7 +460,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
     This function is a convenience wrapper around ``read_sql_table`` and
     ``read_sql_query`` (and for backward compatibility) and will delegate
     to the specific function depending on the provided input (database
-    table name or sql query).
+    table name or sql query).  The delegated function might have more specific
+    notes about their functionality not listed here.
 
     See also
     --------
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 2db6f1e104770..ac266dd77c984 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -29,7 +29,7 @@
 from datetime import datetime, date, time
 
 from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
-from pandas import date_range, to_datetime, to_timedelta
+from pandas import date_range, to_datetime, to_timedelta, Timestamp
 import pandas.compat as compat
 from pandas.compat import StringIO, range, lrange, string_types
 from pandas.core.datetools import format as date_format
@@ -100,6 +100,7 @@
         'postgresql': """CREATE TABLE types_test_data (
                     "TextCol" TEXT,
                     "DateCol" TIMESTAMP,
+                    "DateColWithTz" TIMESTAMP WITH TIME ZONE,
                     "IntDateCol" INTEGER,
                     "FloatCol" DOUBLE PRECISION,
                     "IntCol" INTEGER,
@@ -109,18 +110,36 @@
                 )"""
     },
     'insert_test_types': {
-        'sqlite': """
+        'sqlite': {
+            'query': """
                 INSERT INTO types_test_data
                 VALUES(?, ?, ?, ?, ?, ?, ?, ?)
                 """,
-        'mysql': """
+            'fields': (
+                'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
+        'mysql': {
+            'query': """
                 INSERT INTO types_test_data
                 VALUES("%s", %s, %s, %s, %s, %s, %s, %s)
                 """,
-        'postgresql': """
+            'fields': (
+                'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
+        'postgresql': {
+            'query': """
                 INSERT INTO types_test_data
-                VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
-                """
+                VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)
+                """,
+            'fields': (
+                'TextCol', 'DateCol', 'DateColWithTz', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
     },
     'read_parameters': {
         'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
@@ -218,11 +237,36 @@ def _load_raw_sql(self):
         self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
         ins = SQL_STRINGS['insert_test_types'][self.flavor]
 
-        data = [(
-            'first', '2000-01-03 00:00:00', 535852800, 10.10, 1, False, 1, False),
-            ('first', '2000-01-04 00:00:00', 1356998400, 10.10, 1, False, None, None)]
+        data = [
+            {
+                'TextCol': 'first',
+                'DateCol': '2000-01-03 00:00:00',
+                'DateColWithTz': '2000-01-01 00:00:00-08:00',
+                'IntDateCol': 535852800,
+                'FloatCol': 10.10,
+                'IntCol': 1,
+                'BoolCol': False,
+                'IntColWithNull': 1,
+                'BoolColWithNull': False,
+            },
+            {
+                'TextCol': 'first',
+                'DateCol': '2000-01-04 00:00:00',
+                'DateColWithTz': '2000-06-01 00:00:00-07:00',
+                'IntDateCol': 1356998400,
+                'FloatCol': 10.10,
+                'IntCol': 1,
+                'BoolCol': False,
+                'IntColWithNull': None,
+                'BoolColWithNull': None,
+            },
+        ]
+
         for d in data:
-            self._get_exec().execute(ins, d)
+            self._get_exec().execute(
+                ins['query'],
+                [d[field] for field in ins['fields']]
+            )
 
     def _count_rows(self, table_name):
         result = self._get_exec().execute(
@@ -1512,6 +1556,19 @@ def test_schema_support(self):
         res2 = pdsql.read_table('test_schema_other2')
         tm.assert_frame_equal(res1, res2)
 
+    def test_datetime_with_time_zone(self):
+        # Test to see if we read the date column with timezones that
+        # the timezone information is converted to utc and into a
+        # np.datetime64 (GH #7139)
+        df = sql.read_sql_table("types_test_data", self.conn)
+        self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
+                        "DateColWithTz loaded with incorrect type")
+
+        # "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
+        self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))
+
+        # "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
+        self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))
 
 #------------------------------------------------------------------------------
 #--- Test Sqlite / MySQL fallback

From 8f9296293b9e572d9c633730ab82106c7ba82fa4 Mon Sep 17 00:00:00 2001
From: Younggun Kim <scari.net@gmail.com>
Date: Mon, 13 Apr 2015 12:59:19 -0400
Subject: [PATCH 055/239] BUG: unstack on unicode name level breaks GH9856

[PYCON 2015 Sprints]
---
 doc/source/whatsnew/v0.16.1.txt |  5 +++++
 pandas/core/index.py            |  2 +-
 pandas/tests/test_frame.py      | 26 +++++++++++++++++++++++++-
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a8a512cfe4229..d346339974fc5 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -152,4 +152,9 @@ Bug Fixes
 =======
 
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
+<<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
+=======
+
+- Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
+>>>>>>> f72aa71... BUG: unstack on unicode name level breaks GH9856
diff --git a/pandas/core/index.py b/pandas/core/index.py
index fd11cd7f598c3..cd861cf26c892 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -4023,7 +4023,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
         labels = list(self.labels)
         shape = list(self.levshape)
 
-        if isinstance(level, (str, int)):
+        if isinstance(level, (compat.string_types, int)):
             level = [level]
         level = [self._get_level_number(lev) for lev in level]
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 6ea76710b4de7..14178f99d6313 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -16,7 +16,7 @@
 
 from pandas.compat import(
     map, zip, range, long, lrange, lmap, lzip,
-    OrderedDict, u, StringIO
+    OrderedDict, u, StringIO, string_types
 )
 from pandas import compat
 
@@ -12428,6 +12428,30 @@ def test_unstack_bool(self):
                                                        ['c', 'l']]))
         assert_frame_equal(rs, xp)
 
+    def test_unstack_level_binding(self):
+        # GH9856
+        mi = pd.MultiIndex(
+                levels=[[u'foo', u'bar'], [u'one', u'two'], [u'a', u'b']],
+                labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
+                names=[u'first', u'second', u'third'])
+        s = pd.Series(0, index=mi)
+        result = s.unstack([1, 2]).stack(0)
+
+        expected_mi = pd.MultiIndex(
+                        levels=[['foo', 'bar'], ['one', 'two']],
+                        labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                        names=['first', 'second'])
+
+        expected = pd.DataFrame(np.array([[np.nan, 0],
+                                          [0, np.nan],
+                                          [np.nan, 0],
+                                          [0, np.nan]],
+                                         dtype=np.float64),
+                                index=expected_mi,
+                                columns=pd.Index(['a', 'b'], name='third'))
+
+        self.assert_frame_equal(result, expected)
+
     def test_unstack_to_series(self):
         # check reversibility
         data = self.frame.unstack()

From 9b38f13169684b7d1b33680db065dad459120111 Mon Sep 17 00:00:00 2001
From: ksanghai <kaushal.sanghai@gmail.com>
Date: Mon, 13 Apr 2015 13:32:04 -0400
Subject: [PATCH 056/239] added the fix for index name lost #9862 and also
 added the corresponding test in test_index

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/index.py            |  4 +++-
 pandas/tests/test_index.py      | 13 +++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index d346339974fc5..7fd191299fa80 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -95,6 +95,7 @@ Bug Fixes
 - Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated.
 - Bug in json serialization when frame has length zero.(:issue:`9805`)
 - Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`)
+- Bug in retaining index name on appending (:issue:`9862`)
 
 
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
diff --git a/pandas/core/index.py b/pandas/core/index.py
index cd861cf26c892..3d11979ccdc83 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -976,7 +976,9 @@ def append(self, other):
             to_concat.append(other)
 
         for obj in to_concat:
-            if isinstance(obj, Index) and obj.name != name:
+            if (isinstance(obj, Index) and
+                obj.name != name and
+                obj.name is not None):
                 name = None
                 break
 
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index bb75b12754dca..e378e2ed4b93f 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -4075,6 +4075,19 @@ def test_groupby(self):
         exp = dict((key, [key]) for key in self.index)
         tm.assert_dict_equal(groups, exp)
 
+    def test_index_name_retained(self):
+        # GH9857
+        result = pd.DataFrame({'x': [1, 2, 6],
+                               'y': [2, 2, 8],
+                               'z': [-5, 0, 5]})
+        result = result.set_index('z')
+        result.loc[10] = [9, 10]
+        df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
+                                    'y': [2, 2, 8, 10],
+                                    'z': [-5, 0, 5, 10]})
+        df_expected = df_expected.set_index('z')
+        tm.assert_frame_equal(result, df_expected)
+
 
 def test_get_combined_index():
     from pandas.core.index import _get_combined_index

From 99742c7335b53a4d6a548d1231c46124977e7e64 Mon Sep 17 00:00:00 2001
From: Jim Crist <crist042@umn.edu>
Date: Mon, 13 Apr 2015 11:38:20 -0500
Subject: [PATCH 057/239] Groupby transform preserves output dtype

Previously `transform` output was always the same dtype as the groupby
object. This allows the output dtype to differ from the input. Fixes #9807.
---
 doc/source/whatsnew/v0.16.1.txt | 6 ++++++
 pandas/core/groupby.py          | 2 +-
 pandas/tests/test_groupby.py    | 8 ++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 7fd191299fa80..b6c9b24d6a382 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -152,10 +152,16 @@ Bug Fixes
 >>>>>>> f0ac930... Fix: unequal comparisons of categorical and scalar
 =======
 
+
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
 <<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
 =======
 
 - Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
+<<<<<<< HEAD
 >>>>>>> f72aa71... BUG: unstack on unicode name level breaks GH9856
+=======
+
+- Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`)
+>>>>>>> 30580e7... Groupby transform preserves output dtype
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 6d98b3b99021b..fe82042604dd5 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -3006,7 +3006,7 @@ def transform(self, func, *args, **kwargs):
         if ((not isinstance(obj.index,MultiIndex) and
              type(result.index) != type(obj.index)) or
             len(result.index) != len(obj.index)):
-            results = obj.values.copy()
+            results = np.empty_like(obj.values, result.values.dtype)
             indices = self.indices
             for (name, group), (i, row) in zip(self, result.iterrows()):
                 indexer = indices[name]
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index e7001eb09f20c..87536b9bf0ff8 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1003,6 +1003,14 @@ def test_transform_broadcast(self):
             for idx in gp.index:
                 assert_fp_equal(res.xs(idx), agged[idx])
 
+    def test_transform_dtype(self):
+        # GH 9807
+        # Check transform dtype output is preserved
+        df = DataFrame([[1, 3], [2, 3]])
+        result = df.groupby(1).transform('mean')
+        expected = DataFrame([[1.5], [1.5]])
+        assert_frame_equal(result, expected)
+
     def test_transform_bug(self):
         # GH 5712
         # transforming on a datetime column

From 4a091fa4ee98f1e1347682aa706fc8db2510caaf Mon Sep 17 00:00:00 2001
From: Henning Sperr <henning.sperr@gmail.com>
Date: Mon, 13 Apr 2015 12:08:10 -0400
Subject: [PATCH 058/239] FIX: division of Decimal would crash on fill because
 Decimal does not support type or dtype. (GH9787) ENH: replace np.isscalar
 with better lib.isscalar ADD: Test decimal division

---
 doc/source/whatsnew/v0.16.1.txt |  5 +++++
 pandas/core/common.py           |  9 +++++++--
 pandas/tests/test_series.py     | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index b6c9b24d6a382..e9d1adda67d06 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -164,4 +164,9 @@ Bug Fixes
 =======
 
 - Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`)
+<<<<<<< HEAD
 >>>>>>> 30580e7... Groupby transform preserves output dtype
+=======
+
+- Bug where dividing a dataframe containing values of type ``Decimal`` by another ``Decimal`` would raise. (:issue:`9787`)
+>>>>>>> bed38f2... FIX: division of Decimal would crash on fill because Decimal does not support type or dtype. (GH9787)
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 0fb35c2fb02fc..5587ba5f92df0 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -1397,14 +1397,19 @@ def _fill_zeros(result, x, y, name, fill):
 
     mask the nan's from x
     """
-
     if fill is None or is_float_dtype(result):
         return result
 
     if name.startswith(('r', '__r')):
         x,y = y,x
 
-    if np.isscalar(y):
+    is_typed_variable = (hasattr(y, 'dtype') or hasattr(y,'type'))
+    is_scalar = lib.isscalar(y)
+
+    if not is_typed_variable and not is_scalar:
+        return result
+
+    if is_scalar:
         y = np.array(y)
 
     if is_integer_dtype(y):
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 70a6e2541692a..420992a3c86b7 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5617,6 +5617,22 @@ def test_map_type_inference(self):
         s2 = s.map(lambda x: np.where(x == 0, 0, 1))
         self.assertTrue(issubclass(s2.dtype.type, np.integer))
 
+    def test_divide_decimal(self):
+        ''' resolves issue #9787 '''
+        from decimal import Decimal
+
+        expected = Series([Decimal(5)])
+
+        s =  Series([Decimal(10)])
+        s =  s/Decimal(2)
+
+        tm.assert_series_equal(expected, s)
+
+        s =  Series([Decimal(10)])
+        s =  s//Decimal(2)
+
+        tm.assert_series_equal(expected, s)
+
     def test_map_decimal(self):
         from decimal import Decimal
 

From 96f3fcd699831a278e987c41b1c7bf9115367071 Mon Sep 17 00:00:00 2001
From: Hatem Nassrat <hnassrat@gmail.com>
Date: Mon, 13 Apr 2015 12:23:41 -0400
Subject: [PATCH 059/239] BUG: Fixing == __eq__ operator for MultiIndex ...
 closes (GH9785)

---
 doc/source/whatsnew/v0.16.1.txt |  4 +++
 pandas/core/common.py           | 11 ++++--
 pandas/core/index.py            |  3 +-
 pandas/tests/test_index.py      | 60 +++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index e9d1adda67d06..86de6df8d9db9 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -139,9 +139,13 @@ Bug Fixes
 
 - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 53f2ea4... BUG: Issue 9798 fixed
 =======
 
+=======
+- Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9875`)
+>>>>>>> 07257a0... BUG: Fixing == __eq__ operator for MultiIndex ... closes (GH9785)
 - Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
 
 <<<<<<< HEAD
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 5587ba5f92df0..c10f220c8ef43 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -2444,7 +2444,10 @@ def _get_dtype_type(arr_or_dtype):
         return np.dtype(arr_or_dtype).type
     elif isinstance(arr_or_dtype, CategoricalDtype):
         return CategoricalDtypeType
-    return arr_or_dtype.dtype.type
+    try:
+        return arr_or_dtype.dtype.type
+    except AttributeError:
+        raise ValueError('%r is not a dtype' % arr_or_dtype)
 
 
 def is_any_int_dtype(arr_or_dtype):
@@ -2515,7 +2518,11 @@ def is_floating_dtype(arr_or_dtype):
 
 
 def is_bool_dtype(arr_or_dtype):
-    tipo = _get_dtype_type(arr_or_dtype)
+    try:
+        tipo = _get_dtype_type(arr_or_dtype)
+    except ValueError:
+        # this isn't even a dtype
+        return False
     return issubclass(tipo, np.bool_)
 
 def is_categorical(array):
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 3d11979ccdc83..0a3adbd19ae92 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -49,9 +49,8 @@ def _indexOp(opname):
     Wrapper function for index comparison operations, to avoid
     code duplication.
     """
-
     def wrapper(self, other):
-        func = getattr(self._data.view(np.ndarray), opname)
+        func = getattr(self.values, opname)
         result = func(np.asarray(other))
 
         # technically we could support bool dtyped Index
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index e378e2ed4b93f..336340dd95991 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1320,6 +1320,24 @@ def test_groupby(self):
         exp = {1: [0, 1], 2: [2, 3, 4]}
         tm.assert_dict_equal(groups, exp)
 
+    def test_equals_op(self):
+        # For issue #9785
+        index_a = Index(['foo', 'bar', 'baz'])
+        index_b = Index(['foo', 'bar', 'baz', 'qux'])
+        # Testing Numpy Results Equivelent
+        assert_array_equal(
+            index_a.equals(index_a),
+            index_a == index_a
+        )
+        assert_array_equal(
+            index_a.equals(index_b),
+            index_a == index_b,
+        )
+        assert_array_equal(
+            index_b.equals(index_a),
+            index_b == index_a,
+        )
+
 
 class Numeric(Base):
 
@@ -4088,6 +4106,48 @@ def test_index_name_retained(self):
         df_expected = df_expected.set_index('z')
         tm.assert_frame_equal(result, df_expected)
 
+    def test_equals_operator(self):
+        # For issue #9785
+        self.assertTrue((self.index == self.index).all())
+
+    def test_index_compare(self):
+        # For issue #9785
+        index_unequal = Index(['foo', 'bar', 'baz'])
+        index_equal = Index([
+            ('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
+            ('baz', 'two'), ('qux', 'one'), ('qux', 'two')
+        ], tupleize_cols=False)
+        # Testing Numpy Results Equivelent
+        assert_array_equal(
+            index_unequal.equals(self.index),
+            index_unequal == self.index,
+            err_msg = 'Index compared with MultiIndex failed',
+        )
+        assert_array_equal(
+            self.index.equals(index_unequal),
+            self.index == index_unequal,
+            err_msg = 'MultiIndex compared with Index failed',
+        )
+        assert_array_equal(
+            self.index.equals(index_equal),
+            self.index == index_equal,
+            err_msg = 'MultiIndex compared with Similar Index failed',
+        )
+        assert_array_equal(
+            index_equal.equals(self.index),
+            index_equal == self.index,
+            err_msg = 'Index compared with Similar MultiIndex failed',
+        )
+        # Testing that the result is true for the index_equal case
+        self.assertTrue(
+            (self.index == index_equal).all(),
+            msg='Assert Index compared with Similar MultiIndex match'
+        )
+        self.assertTrue(
+            (index_equal == self.index).all(),
+            msg='Assert MultiIndex compared with Similar Index match'
+        )
+
 
 def test_get_combined_index():
     from pandas.core.index import _get_combined_index

From 75cfaf7d95e56ebda6339d2396647a51561b780f Mon Sep 17 00:00:00 2001
From: Henning Sperr <henning.sperr@gmail.com>
Date: Mon, 13 Apr 2015 15:49:43 -0400
Subject: [PATCH 060/239] FIX: timeseries asfreq would drop the name of the
 index, closes #9854

---
 doc/source/whatsnew/v0.16.1.txt         | 52 +++++++++++++++++++------
 pandas/tseries/resample.py              |  1 +
 pandas/tseries/tests/test_timeseries.py |  9 +++++
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 86de6df8d9db9..e4f8c94b79b8d 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -96,27 +96,16 @@ Bug Fixes
 - Bug in json serialization when frame has length zero.(:issue:`9805`)
 - Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`)
 - Bug in retaining index name on appending (:issue:`9862`)
-
-
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
-
 - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).
-
-
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
-
 - Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
-
 - Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
-
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
-
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
-
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
-
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
 <<<<<<< HEAD
@@ -125,18 +114,24 @@ Bug Fixes
 =======
 - Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> ce988b4... DOC: Update whatsnew for 0.16.1 (#9764)
 =======
 
+=======
+>>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
 <<<<<<< HEAD
 >>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
 =======
 - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> e67893f... BUG: skiprows doesn't handle blank lines properly when engine='c' (GH #9832)
 =======
 
+=======
+>>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -147,16 +142,34 @@ Bug Fixes
 - Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9875`)
 >>>>>>> 07257a0... BUG: Fixing == __eq__ operator for MultiIndex ... closes (GH9785)
 - Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
+<<<<<<< HEAD
 
 <<<<<<< HEAD
 >>>>>>> 7879205... Fix to allow sparse dataframes to have nan column labels
 =======
+=======
+- Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
+- Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`)
+>>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
 <<<<<<< HEAD
 >>>>>>> f0ac930... Fix: unequal comparisons of categorical and scalar
 =======
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
 <<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
@@ -164,13 +177,30 @@ Bug Fixes
 
 - Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> f72aa71... BUG: unstack on unicode name level breaks GH9856
 =======
 
+=======
+>>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`)
 <<<<<<< HEAD
 >>>>>>> 30580e7... Groupby transform preserves output dtype
 =======
 
+
+
+
+
+
+
+
+
+
+
 - Bug where dividing a dataframe containing values of type ``Decimal`` by another ``Decimal`` would raise. (:issue:`9787`)
+<<<<<<< HEAD
 >>>>>>> bed38f2... FIX: division of Decimal would crash on fill because Decimal does not support type or dtype. (GH9787)
+=======
+- Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`)
+>>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 7607bef0f1d71..1de525ee9c876 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -480,6 +480,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
         if len(obj.index) == 0:
             return obj.copy()
         dti = date_range(obj.index[0], obj.index[-1], freq=freq)
+        dti.name = obj.index.name
         rs = obj.reindex(dti, method=method)
         if normalize:
             rs.index = rs.index.normalize()
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 436a976c72e7e..964e8634bc1ef 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -1131,6 +1131,15 @@ def test_reindex_with_datetimes(self):
         result = ts[list(ts.index[5:10])]
         tm.assert_series_equal(result, expected)
 
+    def test_asfreq_keep_index_name(self):
+        # GH #9854
+        index_name = 'bar'
+        index = pd.date_range('20130101',periods=20,name=index_name)
+        df = pd.DataFrame([x for x in range(20)],columns=['foo'],index=index)
+
+        tm.assert_equal(index_name, df.index.name)
+        tm.assert_equal(index_name, df.asfreq('10D').index.name)
+
     def test_promote_datetime_date(self):
         rng = date_range('1/1/2000', periods=20)
         ts = Series(np.random.randn(20), index=rng)

From bbacedda981742581bc5ffe5bacc6f098c246fbf Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Tue, 14 Apr 2015 10:14:24 -0400
Subject: [PATCH 061/239] Fix 32 syntax u error

---
 pandas/tests/test_frame.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 14178f99d6313..c7c35e63d3d91 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -5970,18 +5970,18 @@ def test_boolean_comparison(self):
 
     def test_equals_different_blocks(self):
         # GH 9330
-        df0 = pd.DataFrame({"A": ["x","y"], "B": [1,2], 
+        df0 = pd.DataFrame({"A": ["x","y"], "B": [1,2],
                             "C": ["w","z"]})
         df1 = df0.reset_index()[["A","B","C"]]
-        # this assert verifies that the above operations have 
+        # this assert verifies that the above operations have
         # induced a block rearrangement
-        self.assertTrue(df0._data.blocks[0].dtype != 
+        self.assertTrue(df0._data.blocks[0].dtype !=
                         df1._data.blocks[0].dtype)
         # do the real tests
         self.assert_frame_equal(df0, df1)
         self.assertTrue(df0.equals(df1))
         self.assertTrue(df1.equals(df0))
-        
+
     def test_to_csv_from_csv(self):
 
         pname = '__tmp_to_csv_from_csv__'
@@ -12431,9 +12431,10 @@ def test_unstack_bool(self):
     def test_unstack_level_binding(self):
         # GH9856
         mi = pd.MultiIndex(
-                levels=[[u'foo', u'bar'], [u'one', u'two'], [u'a', u'b']],
+                levels=[[u('foo'), u('bar')], [u('one'), u('two')],
+                        [u('a'), u('b')]],
                 labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
-                names=[u'first', u'second', u'third'])
+                names=[u('first'), u('second'), u('third')])
         s = pd.Series(0, index=mi)
         result = s.unstack([1, 2]).stack(0)
 

From 57b401e270dada1cf09963e844c18f204c142475 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 13 Apr 2015 15:32:01 -0400
Subject: [PATCH 062/239] Whitespace

---
 pandas/core/frame.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8b683ad89558a..19f15f58afffd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4414,12 +4414,12 @@ def mode(self, axis=0, numeric_only=False):
         """
         Gets the mode(s) of each element along the axis selected. Empty if nothing
         has 2+ occurrences. Adds a row for each mode per label, fills in gaps
-        with nan. 
-        
+        with nan.
+
         Note that there could be multiple values returned for the selected
-        axis (when more than one item share the maximum frequency), which is the 
-        reason why a dataframe is returned. If you want to impute missing values 
-        with the mode in a dataframe ``df``, you can just do this: 
+        axis (when more than one item share the maximum frequency), which is the
+        reason why a dataframe is returned. If you want to impute missing values
+        with the mode in a dataframe ``df``, you can just do this:
         ``df.fillna(df.mode().iloc[0])``
 
         Parameters

From 788facc64bdebc47b8a30bcca13f0297a07e9d2c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 13 Apr 2015 15:35:21 -0400
Subject: [PATCH 063/239] Fix rhs dict assignment

---
 pandas/core/indexing.py       |  5 ++---
 pandas/tests/test_indexing.py | 10 ++++++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 920e8aa04aa1f..8154eb1bb6c8b 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -200,7 +200,6 @@ def _has_valid_positional_setitem_indexer(self, indexer):
         return True
 
     def _setitem_with_indexer(self, indexer, value):
-
         self._has_valid_setitem_indexer(indexer)
 
         # also has the side effect of consolidating in-place
@@ -486,8 +485,8 @@ def can_do_equal_len():
                     self.obj[item_labels[indexer[info_axis]]] = value
                     return
 
-            if isinstance(value, ABCSeries):
-                value = self._align_series(indexer, value)
+            if isinstance(value, (ABCSeries, dict)):
+                value = self._align_series(indexer, Series(value))
 
             elif isinstance(value, ABCDataFrame):
                 value = self._align_frame(indexer, value)
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index ee6140828882c..5f109212add06 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -4411,6 +4411,16 @@ def test_slice_with_zero_step_raises(self):
         self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
                                 lambda: s.ix[::0])
 
+    def test_indexing_assignment_dict_already_exists(self):
+        df = pd.DataFrame({'x': [1, 2, 6],
+                           'y': [2, 2, 8],
+                           'z': [-5, 0, 5]}).set_index('z')
+        expected = df.copy()
+        rhs = dict(x=9, y=99)
+        df.loc[5] = rhs
+        expected.loc[5] = [9, 99]
+        tm.assert_frame_equal(df, expected)
+
 
 class TestSeriesNoneCoercion(tm.TestCase):
     EXPECTED_RESULTS = [

From 3ff19c369002c50a6f8c798d70d8fde5447834a6 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 13 Apr 2015 16:09:23 -0400
Subject: [PATCH 064/239] What's new and doc

---
 doc/source/indexing.rst         | 8 ++++++++
 doc/source/whatsnew/v0.16.1.txt | 5 +++++
 2 files changed, 13 insertions(+)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 2eabc35fd831d..1729a9d76cacd 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -249,6 +249,14 @@ new column.
 If you are using the IPython environment, you may also use tab-completion to
 see these accessible attributes.
 
+You can also assign a ``dict`` to a row of a ``DataFrame``:
+
+.. ipython:: python
+
+   x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})
+   x.iloc[1] = dict(x=9, y=99)
+   x
+
 Slicing ranges
 --------------
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index e4f8c94b79b8d..0b76b03b39d79 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -172,6 +172,7 @@ Bug Fixes
 
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
 =======
 
@@ -204,3 +205,7 @@ Bug Fixes
 =======
 - Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`)
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
+=======
+
+- Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
+>>>>>>> 99aabee... What's new and doc

From 3a9e4ffb52805effd149e70e12640ce1a7e3abc9 Mon Sep 17 00:00:00 2001
From: Hugo Herter <github.dev@hugoh.org>
Date: Mon, 13 Apr 2015 16:18:48 -0400
Subject: [PATCH 065/239] FIX: Describe NotImplementedErrors or use
 AbstractMethodError

This issue fixes #7872 by either describing the reason why a NotImplementedError is raised,
or by throwing an AbstractMethodError in the case of abstract methods (eg: Mixins) that
should be overwritten by the inheriting classes.

This issue is based on discussion of PR #7899 which was never merged.
---
 pandas/core/base.py         |  6 +++---
 pandas/core/categorical.py  |  3 ++-
 pandas/core/common.py       | 11 +++++++++++
 pandas/core/generic.py      | 15 +++++++++------
 pandas/core/groupby.py      | 35 ++++++++++++++++++++---------------
 pandas/core/internals.py    | 12 ++++++++----
 pandas/core/panelnd.py      |  2 +-
 pandas/core/series.py       |  3 ++-
 pandas/io/html.py           | 17 +++++++++--------
 pandas/io/json.py           |  9 +++++----
 pandas/io/parsers.py        |  3 ++-
 pandas/io/sql.py            |  6 ++++--
 pandas/sparse/frame.py      | 14 +++++++-------
 pandas/sparse/panel.py      |  4 ++--
 pandas/src/generate_code.py |  3 ++-
 pandas/tools/plotting.py    |  7 ++++---
 pandas/tseries/base.py      | 10 +++++-----
 pandas/tseries/index.py     | 11 +++++++----
 pandas/tseries/resample.py  |  6 +++---
 pandas/tseries/tdi.py       |  3 ++-
 20 files changed, 108 insertions(+), 72 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index a3d3c3791e20c..a25651a73f507 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -11,7 +11,7 @@
 import pandas.lib as lib
 from pandas.util.decorators import Appender, cache_readonly
 from pandas.core.strings import StringMethods
-
+from pandas.core.common import AbstractMethodError
 
 _shared_docs = dict()
 _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
@@ -32,7 +32,7 @@ class StringMixin(object):
     # Formatting
 
     def __unicode__(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def __str__(self):
         """
@@ -566,4 +566,4 @@ def duplicated(self, take_last=False):
     # abstracts
 
     def _update_inplace(self, result, **kwargs):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index b79f2c9b4f6df..0d66a89b0a585 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -1166,7 +1166,8 @@ def fillna(self, fill_value=None, method=None, limit=None):
         if fill_value is None:
             fill_value = np.nan
         if limit is not None:
-            raise NotImplementedError
+            raise NotImplementedError("specifying a limit for fillna has not "
+                                      "been implemented yet")
 
         values = self._codes
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index c10f220c8ef43..ffe12d0c1546c 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -39,6 +39,17 @@ class AmbiguousIndexError(PandasError, KeyError):
     pass
 
 
+class AbstractMethodError(NotImplementedError):
+    """Raise this error instead of NotImplementedError for abstract methods
+    while keeping compatibility with Python 2 and Python 3.
+    """
+    def __init__(self, class_instance):
+        self.class_instance = class_instance
+
+    def __str__(self):
+        return "This method must be defined on the concrete class of " \
+               + self.class_instance.__class__.__name__
+
 _POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
                              for t in ['O', 'int8',
                                        'uint8', 'int16', 'uint16', 'int32',
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index bc65f1f62fa1a..a99df54650246 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -21,7 +21,8 @@
 from pandas.core.common import (isnull, notnull, is_list_like,
                                 _values_from_object, _maybe_promote,
                                 _maybe_box_datetimelike, ABCSeries,
-                                SettingWithCopyError, SettingWithCopyWarning)
+                                SettingWithCopyError, SettingWithCopyWarning,
+                                AbstractMethodError)
 import pandas.core.nanops as nanops
 from pandas.util.decorators import Appender, Substitution, deprecate_kwarg
 from pandas.core import config
@@ -137,7 +138,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
 
     @property
     def _constructor(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def __unicode__(self):
         # unicode representation based upon iterating over self
@@ -152,7 +153,7 @@ def _local_dir(self):
 
     @property
     def _constructor_sliced(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     #----------------------------------------------------------------------
     # Axis
@@ -1100,7 +1101,7 @@ def _iget_item_cache(self, item):
         return lower
 
     def _box_item_values(self, key, values):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _maybe_cache_changed(self, item, value):
         """
@@ -3057,7 +3058,8 @@ def first(self, offset):
         """
         from pandas.tseries.frequencies import to_offset
         if not isinstance(self.index, DatetimeIndex):
-            raise NotImplementedError
+            raise NotImplementedError("'first' only supports a DatetimeIndex "
+                                      "index")
 
         if len(self.index) == 0:
             return self
@@ -3091,7 +3093,8 @@ def last(self, offset):
         """
         from pandas.tseries.frequencies import to_offset
         if not isinstance(self.index, DatetimeIndex):
-            raise NotImplementedError
+            raise NotImplementedError("'last' only supports a DatetimeIndex "
+                                      "index")
 
         if len(self.index) == 0:
             return self
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index fe82042604dd5..6b2c9639ac71f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -25,7 +25,8 @@
                                notnull, _DATELIKE_DTYPES, is_numeric_dtype,
                                is_timedelta64_dtype, is_datetime64_dtype,
                                is_categorical_dtype, _values_from_object,
-                               is_datetime_or_timedelta_dtype, is_bool_dtype)
+                               is_datetime_or_timedelta_dtype, is_bool_dtype,
+                               AbstractMethodError)
 from pandas.core.config import option_context
 import pandas.lib as lib
 from pandas.lib import Timestamp
@@ -279,7 +280,7 @@ def _set_grouper(self, obj, sort=False):
         return self.grouper
 
     def _get_binner_for_grouping(self, obj):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     @property
     def groups(self):
@@ -670,7 +671,7 @@ def _python_apply_general(self, f):
                                          not_indexed_same=mutated)
 
     def aggregate(self, func, *args, **kwargs):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     @Appender(_agg_doc)
     def agg(self, func, *args, **kwargs):
@@ -680,7 +681,7 @@ def _iterate_slices(self):
         yield self.name, self._selected_obj
 
     def transform(self, func, *args, **kwargs):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def mean(self):
         """
@@ -1127,7 +1128,7 @@ def _python_agg_general(self, func, *args, **kwargs):
         return self._wrap_aggregated_output(output)
 
     def _wrap_applied_output(self, *args, **kwargs):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _concat_objects(self, keys, values, not_indexed_same=False):
         from pandas.tools.merge import concat
@@ -1484,7 +1485,8 @@ def aggregate(self, values, how, axis=0):
                 swapped = True
                 values = values.swapaxes(0, axis)
             if arity > 1:
-                raise NotImplementedError
+                raise NotImplementedError("arity of more than 1 is not "
+                                          "supported for the 'how' argument")
             out_shape = (self.ngroups,) + values.shape[1:]
 
         is_numeric = is_numeric_dtype(values.dtype)
@@ -1556,7 +1558,8 @@ def _aggregate(self, result, counts, values, agg_func, is_numeric):
         comp_ids, _, ngroups = self.group_info
         if values.ndim > 3:
             # punting for now
-            raise NotImplementedError
+            raise NotImplementedError("number of dimensions is currently "
+                                      "limited to 3")
         elif values.ndim > 2:
             for i, chunk in enumerate(values.transpose(2, 0, 1)):
 
@@ -1815,7 +1818,8 @@ def _aggregate(self, result, counts, values, agg_func, is_numeric=True):
 
         if values.ndim > 3:
             # punting for now
-            raise NotImplementedError
+            raise NotImplementedError("number of dimensions is currently "
+                                      "limited to 3")
         elif values.ndim > 2:
             for i, chunk in enumerate(values.transpose(2, 0, 1)):
                 agg_func(result[:, :, i], counts, chunk, self.bins)
@@ -2622,7 +2626,8 @@ def aggregate(self, arg, *args, **kwargs):
             if self._selection is not None:
                 subset = obj
                 if isinstance(subset, DataFrame):
-                    raise NotImplementedError
+                    raise NotImplementedError("Aggregating on a DataFrame is "
+                                              "not supported")
 
                 for fname, agg_how in compat.iteritems(arg):
                     colg = SeriesGroupBy(subset, selection=self._selection,
@@ -2671,7 +2676,7 @@ def _aggregate_multiple_funcs(self, arg):
         from pandas.tools.merge import concat
 
         if self.axis != 0:
-            raise NotImplementedError
+            raise NotImplementedError("axis other than 0 is not supported")
 
         obj = self._obj_with_exclusions
 
@@ -2721,7 +2726,7 @@ def _aggregate_generic(self, func, *args, **kwargs):
         return self._wrap_generic_output(result, obj)
 
     def _wrap_aggregated_output(self, output, names=None):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _aggregate_item_by_item(self, func, *args, **kwargs):
         # only for axis==0
@@ -3283,7 +3288,7 @@ def _iterate_slices(self):
                 slice_axis = self._selection_list
             slicer = lambda x: self._selected_obj[x]
         else:
-            raise NotImplementedError
+            raise NotImplementedError("axis other than 0 is not supported")
 
         for val in slice_axis:
             if val in self.exclusions:
@@ -3348,10 +3353,10 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
             new_axes[self.axis] = self.grouper.result_index
             return Panel._from_axes(result, new_axes)
         else:
-            raise NotImplementedError
+            raise ValueError("axis value must be greater than 0")
 
     def _wrap_aggregated_output(self, output, names=None):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
 
 class NDArrayGroupBy(GroupBy):
@@ -3405,7 +3410,7 @@ def _chop(self, sdata, slice_obj):
         return sdata.iloc[slice_obj]
 
     def apply(self, f):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
 
 class ArraySplitter(DataSplitter):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 9b2d366bfb2be..269f692f8c4b7 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -294,7 +294,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
         mask = isnull(self.values)
         if limit is not None:
             if self.ndim > 2:
-                raise NotImplementedError
+                raise NotImplementedError("number of dimensions for 'fillna' "
+                                          "is currently limited to 2")
             mask[mask.cumsum(self.ndim-1)>limit]=False
 
         value = self._try_fill(value)
@@ -1681,7 +1682,8 @@ def _slice(self, slicer):
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         # we may need to upcast our fill to match our dtype
         if limit is not None:
-            raise NotImplementedError
+            raise NotImplementedError("specifying a limit for 'fillna' has "
+                                      "not been implemented yet")
 
         values = self.values if inplace else self.values.copy()
         return [self.make_block_same_class(values=values.fillna(fill_value=value,
@@ -1848,7 +1850,8 @@ def fillna(self, value, limit=None,
         value = self._try_fill(value)
         if limit is not None:
             if self.ndim > 2:
-                raise NotImplementedError
+                raise NotImplementedError("number of dimensions for 'fillna' "
+                                          "is currently limited to 2")
             mask[mask.cumsum(self.ndim-1)>limit]=False
 
         np.putmask(values, mask, value)
@@ -2011,7 +2014,8 @@ def interpolate(self, method='pad', axis=0, inplace=False,
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         # we may need to upcast our fill to match our dtype
         if limit is not None:
-            raise NotImplementedError
+            raise NotImplementedError("specifying a limit for 'fillna' has "
+                                      "not been implemented yet")
         if issubclass(self.dtype.type, np.floating):
             value = float(value)
         values = self.values if inplace else self.values.copy()
diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py
index ec0a313ff5767..d021cb2d59ecf 100644
--- a/pandas/core/panelnd.py
+++ b/pandas/core/panelnd.py
@@ -99,7 +99,7 @@ def _combine_with_constructor(self, other, func):
     for f in ['to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter',
               'dropna', 'shift']:
         def func(self, *args, **kwargs):
-            raise NotImplementedError
+            raise NotImplementedError("this operation is not supported")
         setattr(klass, f, func)
 
     # add the aggregate operations
diff --git a/pandas/core/series.py b/pandas/core/series.py
index b71c269468d62..f9c56db018639 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -140,7 +140,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                 dtype = self._validate_dtype(dtype)
 
             if isinstance(data, MultiIndex):
-                raise NotImplementedError
+                raise NotImplementedError("initializing a Series from a "
+                                          "MultiIndex is not supported")
             elif isinstance(data, Index):
                 # need to copy to avoid aliasing issues
                 if name is None:
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 9f5c10ce128d2..b806b5147c4a5 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -19,6 +19,7 @@
                            raise_with_traceback, binary_type)
 from pandas.core import common as com
 from pandas import Series
+from pandas.core.common import AbstractMethodError
 
 _IMPORTS = False
 _HAS_BS4 = False
@@ -229,7 +230,7 @@ def _text_getter(self, obj):
         text : str or unicode
             The text from an individual DOM node.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_td(self, obj):
         """Return the td elements from a row element.
@@ -243,7 +244,7 @@ def _parse_td(self, obj):
         columns : list of node-like
             These are the elements of each row, i.e., the columns.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_tables(self, doc, match, attrs):
         """Return all tables from the parsed DOM.
@@ -270,7 +271,7 @@ def _parse_tables(self, doc, match, attrs):
         tables : list of node-like
             A list of <table> elements to be parsed into raw data.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_tr(self, table):
         """Return the list of row elements from the parsed table element.
@@ -285,7 +286,7 @@ def _parse_tr(self, table):
         rows : list of node-like
             A list row elements of a table, usually <tr> or <th> elements.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_thead(self, table):
         """Return the header of a table.
@@ -300,7 +301,7 @@ def _parse_thead(self, table):
         thead : node-like
             A <thead>...</thead> element.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_tbody(self, table):
         """Return the body of the table.
@@ -315,7 +316,7 @@ def _parse_tbody(self, table):
         tbody : node-like
             A <tbody>...</tbody> element.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _parse_tfoot(self, table):
         """Return the footer of the table if any.
@@ -330,7 +331,7 @@ def _parse_tfoot(self, table):
         tfoot : node-like
             A <tfoot>...</tfoot> element.
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _build_doc(self):
         """Return a tree-like object that can be used to iterate over the DOM.
@@ -339,7 +340,7 @@ def _build_doc(self):
         -------
         obj : tree-like
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _build_table(self, table):
         header = self._parse_raw_thead(table)
diff --git a/pandas/io/json.py b/pandas/io/json.py
index 9e8ef74545ef2..0659e34c3f27b 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -11,6 +11,7 @@
 from pandas import compat, isnull
 from pandas import Series, DataFrame, to_datetime
 from pandas.io.common import get_filepath_or_buffer
+from pandas.core.common import AbstractMethodError
 import pandas.core.common as com
 
 loads = _json.loads
@@ -33,7 +34,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=double_precision, ensure_ascii=force_ascii,
             date_unit=date_unit, default_handler=default_handler).write()
     else:
-        raise NotImplementedError
+        raise NotImplementedError("'obj' should be a Series or a DataFrame")
 
     if isinstance(path_or_buf, compat.string_types):
         with open(path_or_buf, 'w') as fh:
@@ -64,7 +65,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         self._format_axes()
 
     def _format_axes(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def write(self):
         return dumps(
@@ -282,7 +283,7 @@ def _convert_axes(self):
                 setattr(self.obj, axis, new_axis)
 
     def _try_convert_types(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _try_convert_data(self, name, data, use_dtypes=True,
                           convert_dates=True):
@@ -395,7 +396,7 @@ def _try_convert_to_date(self, data):
         return data, False
 
     def _try_convert_dates(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
 
 class SeriesParser(Parser):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 45a85bb63f12c..875a25170b51d 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -14,6 +14,7 @@
 from pandas.core.frame import DataFrame
 import datetime
 import pandas.core.common as com
+from pandas.core.common import AbstractMethodError
 from pandas.core.config import get_option
 from pandas.io.date_converters import generic_parser
 from pandas.io.common import get_filepath_or_buffer
@@ -707,7 +708,7 @@ def _make_engine(self, engine='c'):
             self._engine = klass(self.f, **self.options)
 
     def _failover_to_python(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def read(self, nrows=None):
         if nrows is not None:
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 7c70b4b1df492..ad88d74a5aa91 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -541,7 +541,8 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
     if isinstance(frame, Series):
         frame = frame.to_frame()
     elif not isinstance(frame, DataFrame):
-        raise NotImplementedError
+        raise NotImplementedError("'frame' argument should be either a "
+                                  "Series or a DataFrame")
 
     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
                       index_label=index_label, schema=schema,
@@ -1444,7 +1445,8 @@ def __init__(self, con, flavor, is_cursor=False):
         if flavor is None:
             flavor = 'sqlite'
         if flavor not in ['sqlite', 'mysql']:
-            raise NotImplementedError
+            raise NotImplementedError("flavors other than SQLite and MySQL "
+                                      "are not supported")
         else:
             self.flavor = flavor
 
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
index bc022fcb6542b..83278fe12d641 100644
--- a/pandas/sparse/frame.py
+++ b/pandas/sparse/frame.py
@@ -418,7 +418,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
         new_index, new_columns = this.index, this.columns
 
         if level is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'level' argument is not supported")
 
         if self.empty and other.empty:
             return SparseDataFrame(index=new_index).__finalize__(self)
@@ -459,9 +459,9 @@ def _combine_match_index(self, other, func, level=None, fill_value=None):
         new_data = {}
 
         if fill_value is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'fill_value' argument is not supported")
         if level is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'level' argument is not supported")
 
         new_index = self.index.union(other.index)
         this = self
@@ -494,9 +494,9 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None):
         # possible for this to happen, which is bothersome
 
         if fill_value is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'fill_value' argument is not supported")
         if level is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'level' argument is not supported")
 
         new_data = {}
 
@@ -567,10 +567,10 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None,
             raise TypeError('Reindex by level not supported for sparse')
 
         if com.notnull(fill_value):
-            raise NotImplementedError
+            raise NotImplementedError("'fill_value' argument is not supported")
 
         if limit:
-            raise NotImplementedError
+            raise NotImplementedError("'limit' argument is not supported")
 
         # TODO: fill value handling
         sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns)
diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py
index d3f3f59f264c5..34256acfb0e60 100644
--- a/pandas/sparse/panel.py
+++ b/pandas/sparse/panel.py
@@ -32,7 +32,7 @@ def __set__(self, obj, value):
         value = _ensure_index(value)
 
         if isinstance(value, MultiIndex):
-            raise NotImplementedError
+            raise NotImplementedError("value cannot be a MultiIndex")
 
         for v in compat.itervalues(obj._frames):
             setattr(v, self.frame_attr, value)
@@ -159,7 +159,7 @@ def _get_items(self):
     def _set_items(self, new_items):
         new_items = _ensure_index(new_items)
         if isinstance(new_items, MultiIndex):
-            raise NotImplementedError
+            raise NotImplementedError("itemps cannot be a MultiIndex")
 
         # need to create new frames dict
 
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 575fcf386f570..9d0384857ed81 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -1653,7 +1653,8 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
 
     b = 0
     if K > 1:
-        raise NotImplementedError
+        raise NotImplementedError("Argument 'values' must have only "
+                                  "one dimension")
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 6a284e547433a..4cd6f6eda26d9 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -12,6 +12,7 @@
 
 from pandas.util.decorators import cache_readonly, deprecate_kwarg
 import pandas.core.common as com
+from pandas.core.common import AbstractMethodError
 from pandas.core.generic import _shared_docs, _shared_doc_kwargs
 from pandas.core.index import Index, MultiIndex
 from pandas.core.series import Series, remove_na
@@ -131,7 +132,7 @@ def random_color(column):
 
             colors = lmap(random_color, lrange(num_colors))
         else:
-            raise NotImplementedError
+            raise ValueError("color_type must be either 'default' or 'random'")
 
     if len(colors) != num_colors:
         multiple = num_colors//len(colors) - 1
@@ -1017,7 +1018,7 @@ def _compute_plot_data(self):
         self.data = numeric_data
 
     def _make_plot(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _add_table(self):
         if self.table is False:
@@ -1821,7 +1822,7 @@ def f(ax, x, y, w, start=None, log=self.log, **kwds):
                 start = start + self.left
                 return ax.barh(x, y, w, left=start, **kwds)
         else:
-            raise NotImplementedError
+            raise ValueError("BarPlot kind must be either 'bar' or 'barh'")
 
         return f
 
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index ed11b12871ce5..2b37c64940170 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -8,7 +8,7 @@
 from pandas import compat
 import numpy as np
 from pandas.core import common as com
-from pandas.core.common import is_integer, is_float
+from pandas.core.common import is_integer, is_float, AbstractMethodError
 import pandas.tslib as tslib
 import pandas.lib as lib
 from pandas.core.index import Index
@@ -48,7 +48,7 @@ def _box_func(self):
         """
         box function to get object from internal representation
         """
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _box_values(self, values):
         """
@@ -261,7 +261,7 @@ def _formatter_func(self):
         return str
 
     def _format_footer(self):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def __unicode__(self):
         formatter = self._formatter_func
@@ -314,10 +314,10 @@ def _convert_scalar_indexer(self, key, kind=None):
         return super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, kind=kind)
 
     def _add_datelike(self, other):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     def _sub_datelike(self, other):
-        raise NotImplementedError
+        raise AbstractMethodError(self)
 
     @classmethod
     def _add_datetimelike_methods(cls):
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index ca5119acc8b99..da9214198d774 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1655,14 +1655,15 @@ def indexer_at_time(self, time, asof=False):
         from dateutil.parser import parse
 
         if asof:
-            raise NotImplementedError
+            raise NotImplementedError("'asof' argument is not supported")
 
         if isinstance(time, compat.string_types):
             time = parse(time).time()
 
         if time.tzinfo:
             # TODO
-            raise NotImplementedError
+            raise NotImplementedError("argument 'time' with timezone info is "
+                                      "not supported")
 
         time_micros = self._get_time_micros()
         micros = _time_to_micros(time)
@@ -1694,7 +1695,8 @@ def indexer_between_time(self, start_time, end_time, include_start=True,
             end_time = parse(end_time).time()
 
         if start_time.tzinfo or end_time.tzinfo:
-            raise NotImplementedError
+            raise NotImplementedError("argument 'time' with timezone info is "
+                                      "not supported")
 
         time_micros = self._get_time_micros()
         start_micros = _time_to_micros(start_time)
@@ -1773,7 +1775,8 @@ def _generate_regular_range(start, end, periods, offset):
             b = e - np.int64(periods) * stride
             tz = end.tz
         else:
-            raise NotImplementedError
+            raise ValueError("at least 'start' or 'end' should be specified "
+                             "if a 'period' is given.")
 
         data = np.arange(b, e, stride, dtype=np.int64)
         data = DatetimeIndex._simple_new(data, None, tz=tz)
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 1de525ee9c876..942dea84f501a 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -373,11 +373,11 @@ def _take_new_index(obj, indexer, new_index, axis=0):
         return Series(new_values, index=new_index, name=obj.name)
     elif isinstance(obj, DataFrame):
         if axis == 1:
-            raise NotImplementedError
+            raise NotImplementedError("axis 1 is not supported")
         return DataFrame(obj._data.reindex_indexer(
             new_axis=new_index, indexer=indexer, axis=1))
     else:
-        raise NotImplementedError
+        raise ValueError("'obj' should be either a Series or a DataFrame")
 
 
 def _get_range_edges(first, last, offset, closed='left', base=0):
@@ -467,7 +467,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
     """
     if isinstance(obj.index, PeriodIndex):
         if method is not None:
-            raise NotImplementedError
+            raise NotImplementedError("'method' argument is not supported")
 
         if how is None:
             how = 'E'
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index e01ff54feab57..0f8ba279ec3a6 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -927,7 +927,8 @@ def _generate_regular_range(start, end, periods, offset):
         e = Timedelta(end).value + stride
         b = e - periods * stride
     else:
-        raise NotImplementedError
+        raise ValueError("at least 'start' or 'end' should be specified "
+                         "if a 'period' is given.")
 
     data = np.arange(b, e, stride, dtype=np.int64)
     data = TimedeltaIndex._simple_new(data, None)

From 11ed4c516e99fa4c9ea8f4e96e2f50c930093ab9 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 12 Apr 2015 00:46:59 +0900
Subject: [PATCH 066/239] BUG: secondary ax has incorrect right_ax property

---
 doc/source/whatsnew/v0.16.1.txt       |  3 +++
 pandas/tests/test_graphics.py         |  3 +++
 pandas/tools/plotting.py              | 33 +++++++++++++++------------
 pandas/tseries/tests/test_plotting.py | 22 ++++++++++++++----
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0b76b03b39d79..8d1e02539543b 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -106,6 +106,9 @@ Bug Fixes
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
+
+- Bug in plotting ``secondary_y`` incorrectly attaches ``right_ax`` property to secondary axes specifying itself recursively. (:issue:`9861`)
+
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
 <<<<<<< HEAD
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 7d489ce66c288..a90fbe6c25b1f 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -1609,7 +1609,10 @@ def test_line_lim(self):
         self.assertEqual(xmax, lines[0].get_data()[0][-1])
 
         axes = df.plot(secondary_y=True, subplots=True)
+        self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
         for ax in axes:
+            self.assertTrue(hasattr(ax, 'left_ax'))
+            self.assertFalse(hasattr(ax, 'right_ax'))
             xmin, xmax = ax.get_xlim()
             lines = ax.get_lines()
             self.assertEqual(xmin, lines[0].get_data()[0][0])
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 4cd6f6eda26d9..fe4751b1dc0a4 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -928,19 +928,21 @@ def _has_plotted_object(self, ax):
 
     def _maybe_right_yaxis(self, ax, axes_num):
         if not self.on_right(axes_num):
-            if hasattr(ax, 'left_ax'):
-                # secondary axes may be passed as axes
-                return ax.left_ax
-            return ax
+            # secondary axes may be passed via ax kw
+            return self._get_ax_layer(ax)
 
         if hasattr(ax, 'right_ax'):
+            # if it has right_ax proparty, ``ax`` must be left axes
             return ax.right_ax
+        elif hasattr(ax, 'left_ax'):
+            # if it has left_ax proparty, ``ax`` must be right axes
+            return ax
         else:
+            # otherwise, create twin axes
             orig_ax, new_ax = ax, ax.twinx()
             new_ax._get_lines.color_cycle = orig_ax._get_lines.color_cycle
 
             orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax
-            new_ax.right_ax = new_ax
 
             if not self._has_plotted_object(orig_ax):  # no data on left y
                 orig_ax.get_yaxis().set_visible(False)
@@ -988,9 +990,8 @@ def result(self):
             all_sec = (com.is_list_like(self.secondary_y) and
                        len(self.secondary_y) == self.nseries)
             if (sec_true or all_sec):
-                # if all data is plotted on secondary,
-                # return secondary axes
-                return self.axes[0].right_ax
+                # if all data is plotted on secondary, return right axes
+                return self._get_ax_layer(self.axes[0], primary=False)
             else:
                 return self.axes[0]
 
@@ -1230,11 +1231,18 @@ def _get_index_name(self):
 
         return name
 
+    @classmethod
+    def _get_ax_layer(cls, ax, primary=True):
+        """get left (primary) or right (secondary) axes"""
+        if primary:
+            return getattr(ax, 'left_ax', ax)
+        else:
+            return getattr(ax, 'right_ax', ax)
+
     def _get_ax(self, i):
         # get the twinx ax if appropriate
         if self.subplots:
             ax = self.axes[i]
-
             ax = self._maybe_right_yaxis(ax, i)
             self.axes[i] = ax
         else:
@@ -2501,8 +2509,7 @@ def plot_series(data, kind='line', ax=None,                    # Series unique
     """
     if ax is None and len(plt.get_fignums()) > 0:
         ax = _gca()
-        ax = getattr(ax, 'left_ax', ax)
-
+        ax = MPLPlot._get_ax_layer(ax)
     return _plot(data, kind=kind, ax=ax,
                  figsize=figsize, use_index=use_index, title=title,
                  grid=grid, legend=legend,
@@ -3349,11 +3356,9 @@ def _flatten(axes):
 def _get_all_lines(ax):
     lines = ax.get_lines()
 
-    # check for right_ax, which can oddly sometimes point back to ax
-    if hasattr(ax, 'right_ax') and ax.right_ax != ax:
+    if hasattr(ax, 'right_ax'):
         lines += ax.right_ax.get_lines()
 
-    # no such risk with left_ax
     if hasattr(ax, 'left_ax'):
         lines += ax.left_ax.get_lines()
 
diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py
index bdc0aa02f2715..c5ed8a1ac3e31 100644
--- a/pandas/tseries/tests/test_plotting.py
+++ b/pandas/tseries/tests/test_plotting.py
@@ -528,7 +528,9 @@ def test_secondary_y(self):
 
         ser = Series(np.random.randn(10))
         ser2 = Series(np.random.randn(10))
-        ax = ser.plot(secondary_y=True).right_ax
+        ax = ser.plot(secondary_y=True)
+        self.assertTrue(hasattr(ax, 'left_ax'))
+        self.assertFalse(hasattr(ax, 'right_ax'))
         fig = ax.get_figure()
         axes = fig.get_axes()
         l = ax.get_lines()[0]
@@ -543,8 +545,12 @@ def test_secondary_y(self):
         plt.close(ax2.get_figure())
 
         ax = ser2.plot()
-        ax2 = ser.plot(secondary_y=True).right_ax
+        ax2 = ser.plot(secondary_y=True)
         self.assertTrue(ax.get_yaxis().get_visible())
+        self.assertFalse(hasattr(ax, 'left_ax'))
+        self.assertTrue(hasattr(ax, 'right_ax'))
+        self.assertTrue(hasattr(ax2, 'left_ax'))
+        self.assertFalse(hasattr(ax2, 'right_ax'))
 
     @slow
     def test_secondary_y_ts(self):
@@ -552,7 +558,9 @@ def test_secondary_y_ts(self):
         idx = date_range('1/1/2000', periods=10)
         ser = Series(np.random.randn(10), idx)
         ser2 = Series(np.random.randn(10), idx)
-        ax = ser.plot(secondary_y=True).right_ax
+        ax = ser.plot(secondary_y=True)
+        self.assertTrue(hasattr(ax, 'left_ax'))
+        self.assertFalse(hasattr(ax, 'right_ax'))
         fig = ax.get_figure()
         axes = fig.get_axes()
         l = ax.get_lines()[0]
@@ -577,7 +585,9 @@ def test_secondary_kde(self):
 
         import matplotlib.pyplot as plt
         ser = Series(np.random.randn(10))
-        ax = ser.plot(secondary_y=True, kind='density').right_ax
+        ax = ser.plot(secondary_y=True, kind='density')
+        self.assertTrue(hasattr(ax, 'left_ax'))
+        self.assertFalse(hasattr(ax, 'right_ax'))
         fig = ax.get_figure()
         axes = fig.get_axes()
         self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right')
@@ -922,7 +932,9 @@ def test_secondary_upsample(self):
         ax = high.plot(secondary_y=True)
         for l in ax.get_lines():
             self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D')
-        for l in ax.right_ax.get_lines():
+        self.assertTrue(hasattr(ax, 'left_ax'))
+        self.assertFalse(hasattr(ax, 'right_ax'))
+        for l in ax.left_ax.get_lines():
             self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D')
 
     @slow

From 7406dc341c305b8c214d18e0c610be506cb79d85 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Fri, 3 Apr 2015 09:26:17 -0400
Subject: [PATCH 067/239] BUG: where behaves badly when dtype of self is
 datetime or timedelta, and dtype of other is not (GH9804)

---
 doc/source/whatsnew/v0.16.1.txt | 10 ++++++++
 pandas/core/generic.py          |  3 ++-
 pandas/core/internals.py        | 22 ++++++++++-------
 pandas/tests/test_series.py     | 42 +++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 8d1e02539543b..1b9cc8e7fc35a 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -146,12 +146,15 @@ Bug Fixes
 >>>>>>> 07257a0... BUG: Fixing == __eq__ operator for MultiIndex ... closes (GH9785)
 - Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 <<<<<<< HEAD
 >>>>>>> 7879205... Fix to allow sparse dataframes to have nan column labels
 =======
 =======
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
+=======
+>>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
 - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`)
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
@@ -173,12 +176,19 @@ Bug Fixes
 
 
+
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
 =======
 
+=======
+
+- Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
+- Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`)
+>>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
 - Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
 <<<<<<< HEAD
 <<<<<<< HEAD
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a99df54650246..49c5791f6a5a4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3335,7 +3335,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
                 except ValueError:
                     new_other = np.array(other)
 
-                if not (new_other == np.array(other)).all():
+                matches = (new_other == np.array(other))
+                if matches is False or not matches.all():
                     other = np.array(other)
 
                     # we can't use our existing dtype
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 269f692f8c4b7..4d0f8394fbd2a 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1325,13 +1325,11 @@ def _try_fill(self, value):
         return value
 
     def _try_coerce_args(self, values, other):
-        """ provide coercion to our input arguments
-            we are going to compare vs i8, so coerce to floats
-            repring NaT with np.nan so nans propagate
-            values is always ndarray like, other may not be """
+        """ Coerce values and other to float64, with null values converted to
+            NaN. values is always ndarray-like, other may not be """
         def masker(v):
             mask = isnull(v)
-            v = v.view('i8').astype('float64')
+            v = v.astype('float64')
             v[mask] = np.nan
             return v
 
@@ -1343,6 +1341,8 @@ def masker(v):
             other = _coerce_scalar_to_timedelta_type(other, unit='s', box=False).item()
             if other == tslib.iNaT:
                 other = np.nan
+        elif lib.isscalar(other):
+            other = np.float64(other)
         else:
             other = masker(other)
 
@@ -1809,16 +1809,20 @@ def _try_operate(self, values):
         return values.view('i8')
 
     def _try_coerce_args(self, values, other):
-        """ provide coercion to our input arguments
-            we are going to compare vs i8, so coerce to integer
-            values is always ndarra like, other may not be """
+        """ Coerce values and other to dtype 'i8'. NaN and NaT convert to
+            the smallest i8, and will correctly round-trip to NaT if converted
+            back in _try_coerce_result. values is always ndarray-like, other
+            may not be """
         values = values.view('i8')
+
         if is_null_datelike_scalar(other):
             other = tslib.iNaT
         elif isinstance(other, datetime):
             other = lib.Timestamp(other).asm8.view('i8')
-        else:
+        elif hasattr(other, 'dtype') and com.is_integer_dtype(other):
             other = other.view('i8')
+        else:
+            other = np.array(other, dtype='i8')
 
         return values, other
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 420992a3c86b7..c3b43f3ec70c0 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1859,6 +1859,48 @@ def test_where_dups(self):
         expected = Series([5,11,2,5,11,2],index=[0,1,2,0,1,2])
         assert_series_equal(comb, expected)
 
+    def test_where_datetime(self):
+        s = Series(date_range('20130102', periods=2))
+        expected = Series([10, 10], dtype='datetime64[ns]')
+        mask = np.array([False, False])
+
+        rs = s.where(mask, [10, 10])
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, 10)
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, 10.0)
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, [10.0, 10.0])
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, [10.0, np.nan])
+        expected = Series([10, None], dtype='datetime64[ns]')
+        assert_series_equal(rs, expected)
+
+    def test_where_timedelta(self):
+        s = Series([1, 2], dtype='timedelta64[ns]')
+        expected = Series([10, 10], dtype='timedelta64[ns]')
+        mask = np.array([False, False])
+
+        rs = s.where(mask, [10, 10])
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, 10)
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, 10.0)
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, [10.0, 10.0])
+        assert_series_equal(rs, expected)
+
+        rs = s.where(mask, [10.0, np.nan])
+        expected = Series([10, None], dtype='timedelta64[ns]')
+        assert_series_equal(rs, expected)
+
     def test_mask(self):
         # compare with tested results in test_where
         s = Series(np.random.randn(5))

From 9e9759e922afc637b39e61de9c3c335266ad05a8 Mon Sep 17 00:00:00 2001
From: Tom Ajamian <tc4484@gmail.com>
Date: Mon, 13 Apr 2015 10:39:18 -0400
Subject: [PATCH 068/239] BUG: Fix for cython version checking #9827; adding
 appropriate travis builds to test.

---
 .travis.yml                        | 36 ++++++++++++++++++++++++++++++
 ci/install_conda.sh                |  8 ++++++-
 ci/install_pydata.sh               | 11 +++++++--
 ci/requirements-2.7_BUILD_TEST.txt |  5 +++++
 ci/script.sh                       |  8 +++++--
 setup.py                           |  8 ++++++-
 6 files changed, 70 insertions(+), 6 deletions(-)
 create mode 100644 ci/requirements-2.7_BUILD_TEST.txt

diff --git a/.travis.yml b/.travis.yml
index 6c4d6897a69de..bc87853b26d6e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,24 @@ matrix:
       - JOB_TAG=_LOCALE
       - BUILD_TYPE=conda
       - JOB_NAME: "27_slow_nnet_LOCALE"
+    - python: 2.7
+      env:
+      - NOSE_ARGS="not slow and not disabled"
+      - FULL_DEPS=true
+      - CLIPBOARD_GUI=gtk2
+      - BUILD_TYPE=conda
+      - JOB_NAME: "27_build_test"
+      - JOB_TAG=_BUILD_TEST
+      - BUILD_TEST=true
+    - python: 2.7
+      env:
+      - NOSE_ARGS="not slow and not disabled"
+      - FULL_DEPS=true
+      - CLIPBOARD_GUI=gtk2
+      - BUILD_TYPE=pydata
+      - JOB_NAME: "27_build_test"
+      - JOB_TAG=_BUILD_TEST
+      - BUILD_TEST=true
     - python: 2.7
       env:
       - NOSE_ARGS="not slow and not disabled"
@@ -115,6 +133,24 @@ matrix:
         - NUMPY_BUILD=master
         - BUILD_TYPE=pydata
         - PANDAS_TESTING_MODE="deprecate"
+      - python: 2.7
+        env:
+        - NOSE_ARGS="not slow and not disabled"
+        - FULL_DEPS=true
+        - CLIPBOARD_GUI=gtk2
+        - BUILD_TYPE=conda
+        - JOB_NAME: "27_build_test"
+        - JOB_TAG=_BUILD_TEST
+        - BUILD_TEST=true
+      - python: 2.7
+        env:
+        - NOSE_ARGS="not slow and not disabled"
+        - FULL_DEPS=true
+        - CLIPBOARD_GUI=gtk2
+        - BUILD_TYPE=pydata
+        - JOB_NAME: "27_build_test"
+        - JOB_TAG=_BUILD_TEST
+        - BUILD_TEST=true
 
 before_install:
   - echo "before_install"
diff --git a/ci/install_conda.sh b/ci/install_conda.sh
index 4c8a62c64979d..3b9858595a991 100755
--- a/ci/install_conda.sh
+++ b/ci/install_conda.sh
@@ -96,7 +96,13 @@ if [ "$IRON_TOKEN" ]; then
     export CC='ccache gcc'
 fi
 
-python setup.py build_ext --inplace && python setup.py develop
+if [ "$BUILD_TEST" ]; then
+    pip uninstall --yes cython
+    pip install cython==0.15.1
+    ( python setup.py build_ext --inplace && python setup.py develop ) || true
+else
+    python setup.py build_ext --inplace && python setup.py develop
+fi
 
 for package in beautifulsoup4; do
     pip uninstall --yes $package
diff --git a/ci/install_pydata.sh b/ci/install_pydata.sh
index 33a6d3854da22..f2ab5af34dc64 100755
--- a/ci/install_pydata.sh
+++ b/ci/install_pydata.sh
@@ -137,8 +137,15 @@ if [ "$IRON_TOKEN" ]; then
 fi
 
 # build pandas
-python setup.py build_ext --inplace
-python setup.py develop
+if [ "$BUILD_TEST" ]; then
+    pip uninstall --yes cython
+    pip install cython==0.15.1
+    ( python setup.py build_ext --inplace ) || true
+    ( python setup.py develop ) || true
+else
+    python setup.py build_ext --inplace
+    python setup.py develop
+fi
 
 # restore cython (if not numpy building)
 if [ -z "$NUMPY_BUILD" ]; then
diff --git a/ci/requirements-2.7_BUILD_TEST.txt b/ci/requirements-2.7_BUILD_TEST.txt
new file mode 100644
index 0000000000000..b273ca043c4a2
--- /dev/null
+++ b/ci/requirements-2.7_BUILD_TEST.txt
@@ -0,0 +1,5 @@
+dateutil
+pytz
+numpy
+cython
+nose
diff --git a/ci/script.sh b/ci/script.sh
index e1f71e70ded69..33cd2253cc1ae 100755
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -19,8 +19,12 @@ fi
 pip install -U blosc  # See https://github.com/pydata/pandas/pull/9783
 python -c 'import blosc; blosc.print_versions()'
 
-echo nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
-nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
+if [ "$BUILD_TEST" ]; then
+    echo "We are not running nosetests as this is simply a build test."
+else
+    echo nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
+    nosetests --exe -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
+fi
 
 RET="$?"
 
diff --git a/setup.py b/setup.py
index d4ef6a1d896d8..8066a6e0cae4f 100755
--- a/setup.py
+++ b/setup.py
@@ -11,13 +11,17 @@
 import shutil
 import warnings
 import re
+from distutils.version import LooseVersion
 
 # may need to work around setuptools bug by providing a fake Pyrex
+min_cython_ver = '0.19.1'
 try:
     import Cython
     sys.path.insert(0, os.path.join(os.path.dirname(__file__), "fake_pyrex"))
+    ver = Cython.__version__
+    _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver)
 except ImportError:
-    pass
+    _CYTHON_INSTALLED = False
 
 # try bootstrapping setuptools if it doesn't exist
 try:
@@ -74,6 +78,8 @@
 from distutils.command.build_ext import build_ext as _build_ext
 
 try:
+    if not _CYTHON_INSTALLED:
+        raise ImportError('No supported version of Cython installed.')
     from Cython.Distutils import build_ext as _build_ext
     # from Cython.Distutils import Extension # to get pyrex debugging symbols
     cython = True

From 68e42459cc3560419e218346d770372a9d9dda84 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 14 Apr 2015 10:44:21 -0400
Subject: [PATCH 069/239] move blosc import to install_conda.sh script

---
 ci/install_conda.sh             | 3 +++
 ci/script.sh                    | 3 ---
 doc/source/whatsnew/v0.16.1.txt | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/install_conda.sh b/ci/install_conda.sh
index 3b9858595a991..01b89807d164c 100755
--- a/ci/install_conda.sh
+++ b/ci/install_conda.sh
@@ -86,6 +86,9 @@ conda remove -n pandas pandas
 
 source activate pandas
 
+pip install -U blosc  # See https://github.com/pydata/pandas/pull/9783
+python -c 'import blosc; blosc.print_versions()'
+
 # set the compiler cache to work
 if [ "$IRON_TOKEN" ]; then
     export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH
diff --git a/ci/script.sh b/ci/script.sh
index 33cd2253cc1ae..fe9db792df5e7 100755
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -16,9 +16,6 @@ fi
 "$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 > /tmp/doc.log &
 # doc build log will be shown after tests
 
-pip install -U blosc  # See https://github.com/pydata/pandas/pull/9783
-python -c 'import blosc; blosc.print_versions()'
-
 if [ "$BUILD_TEST" ]; then
     echo "We are not running nosetests as this is simply a build test."
 else
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 1b9cc8e7fc35a..3fe28e5ac9df3 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -106,7 +106,7 @@ Bug Fixes
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
-
+- Bug in ``setup.py`` that would allow an incompat cython version to build (:issue:`9827`)
 - Bug in plotting ``secondary_y`` incorrectly attaches ``right_ax`` property to secondary axes specifying itself recursively. (:issue:`9861`)
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)

From f2e2e3293587d79c9105c34218fce1a5edb8f1d0 Mon Sep 17 00:00:00 2001
From: Jim Crist <crist042@umn.edu>
Date: Mon, 13 Apr 2015 10:23:45 -0500
Subject: [PATCH 070/239] Period accepts datetime64 value

Added support for `datetime64` value for Period. Fixes #9054.
---
 doc/source/whatsnew/v0.16.1.txt     | 1 +
 pandas/src/period.pyx               | 4 ++++
 pandas/tseries/tests/test_period.py | 6 ++++++
 3 files changed, 11 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 3fe28e5ac9df3..ce6d68c4816b3 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -49,6 +49,7 @@ Enhancements
 
 - Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
 - ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`)
+- ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`)
 
 - Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
 
diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx
index cc6ad3defe4f3..b4a4930e09d68 100644
--- a/pandas/src/period.pyx
+++ b/pandas/src/period.pyx
@@ -710,6 +710,10 @@ cdef class Period(object):
             dt = value
             if freq is None:
                 raise ValueError('Must supply freq for datetime value')
+        elif isinstance(value, np.datetime64):
+            dt = Timestamp(value)
+            if freq is None:
+                raise ValueError('Must supply freq for datetime value')
         elif isinstance(value, date):
             dt = datetime(year=value.year, month=value.month, day=value.day)
             if freq is None:
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 17edcd7504102..a471a536a20b3 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -226,7 +226,13 @@ def test_period_constructor(self):
 
         i1 = Period(date(2007, 1, 1), freq='M')
         i2 = Period(datetime(2007, 1, 1), freq='M')
+        i3 = Period(np.datetime64('2007-01-01'), freq='M')
+        i4 = Period(np.datetime64('2007-01-01 00:00:00Z'), freq='M')
+        i5 = Period(np.datetime64('2007-01-01 00:00:00.000Z'), freq='M')
         self.assertEqual(i1, i2)
+        self.assertEqual(i1, i3)
+        self.assertEqual(i1, i4)
+        self.assertEqual(i1, i5)
 
         i1 = Period('2007-01-01 09:00:00.001')
         expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L')

From f65fad4e66e55c45923f23e39ace68e1b973fa1a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Tue, 14 Apr 2015 10:26:06 -0400
Subject: [PATCH 071/239] Add tests for more units

---
 pandas/tseries/tests/test_period.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index a471a536a20b3..23785598783ea 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -238,10 +238,17 @@ def test_period_constructor(self):
         expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L')
         self.assertEqual(i1, expected)
 
+        expected = Period(np.datetime64('2007-01-01 09:00:00.001Z'), freq='L')
+        self.assertEqual(i1, expected)
+
         i1 = Period('2007-01-01 09:00:00.00101')
         expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U')
         self.assertEqual(i1, expected)
 
+        expected = Period(np.datetime64('2007-01-01 09:00:00.00101Z'),
+                          freq='U')
+        self.assertEqual(i1, expected)
+
         self.assertRaises(ValueError, Period, ordinal=200701)
 
         self.assertRaises(ValueError, Period, '2007-1-1', freq='X')
@@ -440,7 +447,7 @@ def test_properties_weekly(self):
         assert_equal((w_date - 1).week, 52)
         assert_equal(w_date.days_in_month, 31)
         assert_equal(Period(freq='WK', year=2012, month=2, day=1).days_in_month, 29)
-        
+
     def test_properties_daily(self):
         # Test properties on Periods with daily frequency.
         b_date = Period(freq='B', year=2007, month=1, day=1)

From 881479a15898959daf5587022061781b0f48e2e4 Mon Sep 17 00:00:00 2001
From: David Hirschfeld <david.hirschfeld@gazprom-mt.com>
Date: Thu, 26 Feb 2015 09:09:54 +0000
Subject: [PATCH 072/239] Moved caching in `AbstractHolidayCalendar` to the
 instance level Closes #9552

---
 doc/source/whatsnew/v0.16.1.txt      |  4 ++++
 pandas/tseries/holiday.py            | 10 +---------
 pandas/tseries/tests/test_holiday.py | 24 ++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index ce6d68c4816b3..137c1741b653d 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -218,8 +218,12 @@ Bug Fixes
 >>>>>>> bed38f2... FIX: division of Decimal would crash on fill because Decimal does not support type or dtype. (GH9787)
 =======
 - Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`)
+<<<<<<< HEAD
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 =======
 
 - Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
 >>>>>>> 99aabee... What's new and doc
+=======
+- Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`)
+>>>>>>> b1e8d9f... Moved caching in `AbstractHolidayCalendar` to the instance level
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index c31e25115c6a4..799be98a329fa 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -279,7 +279,7 @@ class AbstractHolidayCalendar(object):
     rules = []
     start_date = Timestamp(datetime(1970, 1, 1))
     end_date = Timestamp(datetime(2030, 12, 31))
-    _holiday_cache = None
+    _cache = None
 
     def __init__(self, name=None, rules=None):
         """
@@ -351,14 +351,6 @@ def holidays(self, start=None, end=None, return_name=False):
         else:
             return holidays.index
 
-    @property
-    def _cache(self):
-        return self.__class__._holiday_cache
-
-    @_cache.setter
-    def _cache(self, values):
-        self.__class__._holiday_cache = values
-
     @staticmethod
     def merge_class(base, other):
         """
diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py
index 0880e84f1fcde..7d233ba78e7b6 100644
--- a/pandas/tseries/tests/test_holiday.py
+++ b/pandas/tseries/tests/test_holiday.py
@@ -1,6 +1,7 @@
 
 from datetime import datetime
 import pandas.util.testing as tm
+from pandas import DatetimeIndex
 from pandas.tseries.holiday import (
     USFederalHolidayCalendar, USMemorialDay, USThanksgivingDay,
     nearest_workday, next_monday_or_tuesday, next_monday,
@@ -50,6 +51,29 @@ def test_calendar(self):
         self.assertEqual(list(holidays_2.to_pydatetime()),
                          self.holiday_list)
 
+    def test_calendar_caching(self):
+        # Test for issue #9552
+
+        class TestCalendar(AbstractHolidayCalendar):
+            def __init__(self, name=None, rules=None):
+                super(TestCalendar, self).__init__(
+                    name=name,
+                    rules=rules
+                )
+
+        jan1 = TestCalendar(rules=[Holiday('jan1', year=2015, month=1, day=1)])
+        jan2 = TestCalendar(rules=[Holiday('jan2', year=2015, month=1, day=2)])
+
+        tm.assert_index_equal(
+            jan1.holidays(),
+            DatetimeIndex(['01-Jan-2015'])
+        )
+        tm.assert_index_equal(
+            jan2.holidays(),
+            DatetimeIndex(['02-Jan-2015'])
+        )
+
+
 class TestHoliday(tm.TestCase):
 
     def setUp(self):

From 6d111aab9998564f82e4258c02acdcc85950e1cd Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 13 Apr 2015 19:43:10 -0700
Subject: [PATCH 073/239] ENH: allow Panel.shift on items axis

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/panel.py            | 15 ++++++++-------
 pandas/tests/test_panel.py      | 11 ++++++-----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 137c1741b653d..02731fa2113c1 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -52,6 +52,7 @@ Enhancements
 - ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`)
 
 - Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
+- Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
 
 .. _whatsnew_0161.api:
 
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 7df23a54c737d..fb511bdd2a788 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1184,13 +1184,17 @@ def count(self, axis='major'):
     @deprecate_kwarg(old_arg_name='lags', new_arg_name='periods')
     def shift(self, periods=1, freq=None, axis='major'):
         """
-        Shift major or minor axis by specified number of leads/lags. Drops
-        periods right now compared with DataFrame.shift
+        Shift index by desired number of periods with an optional time freq.
+        The shifted data will not include the dropped periods and the
+        shifted axis will be smaller than the original. This is different
+        from the behavior of DataFrame.shift()
 
         Parameters
         ----------
-        lags : int
-        axis : {'major', 'minor'}
+        periods : int
+            Number of periods to move, can be positive or negative
+        freq : DateOffset, timedelta, or time rule string, optional
+        axis : {'items', 'major', 'minor'} or {0, 1, 2}
 
         Returns
         -------
@@ -1199,9 +1203,6 @@ def shift(self, periods=1, freq=None, axis='major'):
         if freq:
             return self.tshift(periods, freq, axis=axis)
 
-        if axis == 'items':
-            raise ValueError('Invalid axis')
-
         return super(Panel, self).slice_shift(periods, axis=axis)
 
     def tshift(self, periods=1, freq=None, axis='major', **kwds):
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 0fd03cb5804a8..a405fa78b2518 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1696,22 +1696,23 @@ def test_shift(self):
         # major
         idx = self.panel.major_axis[0]
         idx_lag = self.panel.major_axis[1]
-
         shifted = self.panel.shift(1)
-
         assert_frame_equal(self.panel.major_xs(idx),
                            shifted.major_xs(idx_lag))
 
         # minor
         idx = self.panel.minor_axis[0]
         idx_lag = self.panel.minor_axis[1]
-
         shifted = self.panel.shift(1, axis='minor')
-
         assert_frame_equal(self.panel.minor_xs(idx),
                            shifted.minor_xs(idx_lag))
 
-        self.assertRaises(Exception, self.panel.shift, 1, axis='items')
+        # items
+        idx = self.panel.items[0]
+        idx_lag = self.panel.items[1]
+        shifted = self.panel.shift(1, axis='items')
+        assert_frame_equal(self.panel[idx],
+                           shifted[idx_lag])
 
         # negative numbers, #2164
         result = self.panel.shift(-1)

From 61d28bcf91b4d47f776f2fa58f3af57c92824f54 Mon Sep 17 00:00:00 2001
From: Vladimir Filimonov <vfilimonov@ethz.ch>
Date: Wed, 15 Apr 2015 17:23:31 +0200
Subject: [PATCH 074/239] Fix of the docs for tz_conver and tz_localize

---
 pandas/core/generic.py  | 13 +++++++++++--
 pandas/tseries/index.py | 10 ++++++++++
 pandas/tslib.pyx        | 13 +++++++++++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 49c5791f6a5a4..8bd85a008f077 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3599,8 +3599,7 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
 
     def tz_convert(self, tz, axis=0, level=None, copy=True):
         """
-        Convert the axis to target time zone. If it is time zone naive, it
-        will be localized to the passed time zone.
+        Convert tz-aware axis to target time zone.
 
         Parameters
         ----------
@@ -3614,6 +3613,11 @@ def tz_convert(self, tz, axis=0, level=None, copy=True):
 
         Returns
         -------
+
+        Raises
+        ------
+        TypeError
+            If the axis is tz-naive.
         """
         axis = self._get_axis_number(axis)
         ax = self._get_axis(axis)
@@ -3672,6 +3676,11 @@ def tz_localize(self, tz, axis=0, level=None, copy=True,
 
         Returns
         -------
+
+        Raises
+        ------
+        TypeError
+            If the TimeSeries is tz-aware and tz is not None.
         """
         axis = self._get_axis_number(axis)
         ax = self._get_axis(axis)
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index da9214198d774..7dac36a9ae5cc 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1589,6 +1589,11 @@ def tz_convert(self, tz):
         Returns
         -------
         normalized : DatetimeIndex
+
+        Raises
+        ------
+        TypeError
+            If DatetimeIndex is tz-naive.
         """
         tz = tslib.maybe_get_tz(tz)
 
@@ -1625,6 +1630,11 @@ def tz_localize(self, tz, ambiguous='raise'):
         Returns
         -------
         localized : DatetimeIndex
+
+        Raises
+        ------
+        TypeError
+            If the DatetimeIndex is tz-aware and tz is not None.
         """
         if self.tz is not None:
             if tz is None:
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 3f04f80406fca..7580fa5489e15 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -447,6 +447,11 @@ class Timestamp(_Timestamp):
         Returns
         -------
         localized : Timestamp
+
+        Raises
+        ------
+        TypeError
+            If the Timestamp is tz-aware and tz is not None.
         """
         if ambiguous == 'infer':
             raise ValueError('Cannot infer offset with only one time.')
@@ -471,8 +476,7 @@ class Timestamp(_Timestamp):
 
     def tz_convert(self, tz):
         """
-        Convert Timestamp to another time zone or localize to requested time
-        zone
+        Convert tz-aware Timestamp to another time zone.
 
         Parameters
         ----------
@@ -483,6 +487,11 @@ class Timestamp(_Timestamp):
         Returns
         -------
         converted : Timestamp
+
+        Raises
+        ------
+        TypeError
+            If Timestamp is tz-naive.
         """
         if self.tzinfo is None:
             # tz naive, use tz_localize

From c9baf0588a10a83f0af4bd84117bb4d193a87ae5 Mon Sep 17 00:00:00 2001
From: Yasin A <yasin.ahakeem@gmail.com>
Date: Wed, 15 Apr 2015 14:42:30 -0400
Subject: [PATCH 075/239] BUG: Fixed latex output for multi-indexed dataframes
 - GH9778

---
 doc/source/whatsnew/v0.16.1.txt |  5 +++++
 pandas/core/format.py           |  8 ++++++--
 pandas/tests/test_format.py     | 22 ++++++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 02731fa2113c1..53164130cf57b 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -227,4 +227,9 @@ Bug Fixes
 >>>>>>> 99aabee... What's new and doc
 =======
 - Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`)
+<<<<<<< HEAD
 >>>>>>> b1e8d9f... Moved caching in `AbstractHolidayCalendar` to the instance level
+=======
+
+- Fixed latex output for multi-indexed dataframes (:issue:`9778`)
+>>>>>>> 4d1268e... BUG: Fixed latex output for multi-indexed dataframes - GH9778
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 7b8a3161b5e05..06e1fab27cd6d 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -613,8 +613,12 @@ def get_col_type(dtype):
             name = any(self.frame.columns.names)
             for i, lev in enumerate(self.frame.index.levels):
                 lev2 = lev.format(name=name)
-                width = len(lev2[0])
-                lev3 = [' ' * width] * clevels + lev2
+                blank = ' ' * len(lev2[0])
+                lev3 = [blank] * clevels
+                for level_idx, group in itertools.groupby(
+                        self.frame.index.labels[i]):
+                    count = len(list(group))
+                    lev3.extend([lev2[level_idx]] + [blank] * (count - 1))
                 strcols.insert(i, lev3)
 
         if column_format is None:
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index 1dcdbf12a6b59..e3455d2449b55 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -2194,6 +2194,28 @@ def test_to_latex_multiindex(self):
 x & y &  a \\
 \bottomrule
 \end{tabular}
+"""
+        self.assertEqual(result, expected)
+
+        df = DataFrame.from_dict({
+            ('c1', 0): pd.Series(dict((x, x) for x in range(4))),
+            ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))),
+            ('c2', 0): pd.Series(dict((x, x) for x in range(4))),
+            ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))),
+            ('c3', 0): pd.Series(dict((x, x) for x in range(4))),
+        }).T
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{llrrrr}
+\toprule
+   &   &  0 &  1 &  2 &  3 \\
+\midrule
+c1 & 0 &  0 &  1 &  2 &  3 \\
+   & 1 &  4 &  5 &  6 &  7 \\
+c2 & 0 &  0 &  1 &  2 &  3 \\
+   & 1 &  4 &  5 &  6 &  7 \\
+c3 & 0 &  0 &  1 &  2 &  3 \\
+\bottomrule
+\end{tabular}
 """
         self.assertEqual(result, expected)
 

From 94c53cda5b138d9265b678ef5e492e00fb853fd8 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 16 Apr 2015 21:37:11 +0900
Subject: [PATCH 076/239] BUG: barplot with log=True not working for values
 smaller than 1

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++--
 pandas/tests/test_graphics.py   | 20 ++++++++++++++++++++
 pandas/tools/plotting.py        | 13 ++++---------
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 53164130cf57b..16b4ca700bd47 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -204,8 +204,8 @@ Bug Fixes
 >>>>>>> 30580e7... Groupby transform preserves output dtype
 =======
 
-
-
+- Bug in bar plot with ``log=True`` raises ``TypeError`` if all values are less than 1 (:issue:`9905`)
+- Bug in horizontal bar plot ignores ``log=True`` (:issue:`9905`)
 
 
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index a90fbe6c25b1f..7ec57c0304530 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -592,6 +592,26 @@ def test_bar_log(self):
 
         ax = Series([200, 500]).plot(log=True, kind='bar')
         assert_array_equal(ax.yaxis.get_ticklocs(), expected)
+        tm.close()
+
+        ax = Series([200, 500]).plot(log=True, kind='barh')
+        assert_array_equal(ax.xaxis.get_ticklocs(), expected)
+        tm.close()
+
+        # GH 9905
+        expected = np.array([1.0e-03, 1.0e-02, 1.0e-01, 1.0e+00])
+
+        if not self.mpl_le_1_2_1:
+            expected = np.hstack((1.0e-04, expected, 1.0e+01))
+
+        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar')
+        assert_array_equal(ax.get_ylim(), (0.001, 0.10000000000000001))
+        assert_array_equal(ax.yaxis.get_ticklocs(), expected)
+        tm.close()
+
+        ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh')
+        assert_array_equal(ax.get_xlim(), (0.001, 0.10000000000000001))
+        assert_array_equal(ax.xaxis.get_ticklocs(), expected)
 
     @slow
     def test_bar_ignore_index(self):
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index fe4751b1dc0a4..513f165af4686 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -1824,11 +1824,12 @@ def _get_plot_function(self):
         if self.kind == 'bar':
             def f(ax, x, y, w, start=None, **kwds):
                 start = start + self.bottom
-                return ax.bar(x, y, w, bottom=start,log=self.log, **kwds)
+                return ax.bar(x, y, w, bottom=start, log=self.log, **kwds)
         elif self.kind == 'barh':
+
             def f(ax, x, y, w, start=None, log=self.log, **kwds):
                 start = start + self.left
-                return ax.barh(x, y, w, left=start, **kwds)
+                return ax.barh(x, y, w, left=start, log=self.log, **kwds)
         else:
             raise ValueError("BarPlot kind must be either 'bar' or 'barh'")
 
@@ -1836,9 +1837,6 @@ def f(ax, x, y, w, start=None, log=self.log, **kwds):
 
     def _make_plot(self):
         import matplotlib as mpl
-        # mpl decided to make their version string unicode across all Python
-        # versions for mpl >= 1.3 so we have to call str here for python 2
-        mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1')
 
         colors = self._get_colors()
         ncolors = len(colors)
@@ -1862,11 +1860,8 @@ def _make_plot(self):
                 kwds['ecolor'] = mpl.rcParams['xtick.color']
 
             start = 0
-            if self.log:
+            if self.log and (y >= 1).all():
                 start = 1
-                if any(y < 1):
-                    # GH3254
-                    start = 0 if mpl_le_1_2_1 else None
 
             if self.subplots:
                 w = self.bar_width / 2

From 11b2f3647c9866da2df09ac40ee4e8b244925dd4 Mon Sep 17 00:00:00 2001
From: Nick Burns <nick@burns.io>
Date: Thu, 16 Apr 2015 11:36:36 -0700
Subject: [PATCH 077/239] removed conditional

---
 pandas/core/frame.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 19f15f58afffd..80edfa2af024e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -794,10 +794,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
                 return cls()
 
             try:
-                if compat.PY3:
-                    first_row = next(data)
-                else:
-                    first_row = next(data)
+                first_row = next(data)
             except StopIteration:
                 return cls(index=index, columns=columns)
 

From ee0533cb1e4e96c77efa970597e73a10800a2782 Mon Sep 17 00:00:00 2001
From: Henning Sperr <henning.sperr@gmail.com>
Date: Mon, 13 Apr 2015 17:07:20 -0400
Subject: [PATCH 078/239] ENH: Raise error on trying to write excel file with a
 MultiIndexed DataFrame. closes #9794

---
 doc/source/whatsnew/v0.16.1.txt |  5 +++
 pandas/core/frame.py            |  3 ++
 pandas/io/tests/test_excel.py   | 71 +++++++++++++++++++++------------
 3 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 16b4ca700bd47..eaba1f4c263c3 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -54,6 +54,8 @@ Enhancements
 - Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
 - Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
 
+- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
+
 .. _whatsnew_0161.api:
 
 API changes
@@ -183,11 +185,14 @@ Bug Fixes
 <<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 35b20d8... BUG: Fix for comparisons of categorical and an scalar not in categories, xref GH9836
 =======
 
 =======
 
+=======
+>>>>>>> a97113c... ENH: Raise error on trying to write excel file with a MultiIndexed DataFrame. closes #9794
 - Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
 - Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`)
 >>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 80edfa2af024e..4f7bc11cbf03c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1241,6 +1241,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
         >>> writer.save()
         """
         from pandas.io.excel import ExcelWriter
+        if self.columns.nlevels > 1:
+            raise NotImplementedError("Writing as Excel with a MultiIndex is "
+                                      "not yet implemented.")
 
         need_save = False
         if encoding == None:
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
index 699d1212556cc..768aa40696cbc 100644
--- a/pandas/io/tests/test_excel.py
+++ b/pandas/io/tests/test_excel.py
@@ -1132,31 +1132,29 @@ def roundtrip(df, header=True, parser_hdr=0):
 
         nrows = 5
         ncols = 3
-
-        for i in range(1, 4):  # row multindex upto nlevel=3
-            for j in range(1, 4):  # col ""
-                df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
-                res = roundtrip(df)
-                # shape
-                self.assertEqual(res.shape, (nrows, ncols + i))
-
-                # no nans
-                for r in range(len(res.index)):
-                    for c in range(len(res.columns)):
-                        self.assertTrue(res.ix[r, c] is not np.nan)
-
-        for i in range(1, 4):  # row multindex upto nlevel=3
-            for j in range(1, 4):  # col ""
-                df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
-                res = roundtrip(df, False)
-                # shape
-                self.assertEqual(res.shape, (
-                    nrows - 1, ncols + i))  # first row taken as columns
-
-                # no nans
-                for r in range(len(res.index)):
-                    for c in range(len(res.columns)):
-                        self.assertTrue(res.ix[r, c] is not np.nan)
+        for use_headers in (True, False):
+            for i in range(1, 4):  # row multindex upto nlevel=3
+                for j in range(1, 4):  # col ""
+                    df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
+
+                    #this if will be removed once multi column excel writing
+                    #is implemented for now fixing #9794
+                    if j>1:
+                        with tm.assertRaises(NotImplementedError):
+                            res = roundtrip(df, use_headers)
+                    else:
+                        res = roundtrip(df, use_headers)
+
+                    if use_headers:
+                        self.assertEqual(res.shape, (nrows, ncols + i))
+                    else:
+                        # first row taken as columns
+                        self.assertEqual(res.shape, (nrows - 1, ncols + i))
+
+                    # no nans
+                    for r in range(len(res.index)):
+                        for c in range(len(res.columns)):
+                            self.assertTrue(res.ix[r, c] is not np.nan)
 
         res = roundtrip(DataFrame([0]))
         self.assertEqual(res.shape, (1, 1))
@@ -1394,6 +1392,29 @@ class XlwtTests(ExcelWriterBase, tm.TestCase):
     engine_name = 'xlwt'
     check_skip = staticmethod(_skip_if_no_xlwt)
 
+    def test_excel_raise_not_implemented_error_on_multiindex_columns(self):
+        _skip_if_no_xlwt()
+        #MultiIndex as columns is not yet implemented 9794
+        cols = pd.MultiIndex.from_tuples([('site',''),
+                                          ('2014','height'),
+                                          ('2014','weight')])
+        df = pd.DataFrame(np.random.randn(10,3), columns=cols)
+        with tm.assertRaises(NotImplementedError):
+            with ensure_clean(self.ext) as path:
+                df.to_excel(path, index=False)
+
+    def test_excel_multiindex_index(self):
+        _skip_if_no_xlwt()
+        #MultiIndex as index works so assert no error #9794
+        cols = pd.MultiIndex.from_tuples([('site',''),
+                                          ('2014','height'),
+                                          ('2014','weight')])
+        df = pd.DataFrame(np.random.randn(3,10), index=cols)
+        with ensure_clean(self.ext) as path:
+            df.to_excel(path, index=False)
+
+
+
     def test_to_excel_styleconverter(self):
         _skip_if_no_xlwt()
 

From a66c4bae41ba7de482821cbc756156c26ddf1d86 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Wed, 1 Apr 2015 07:12:31 -0400
Subject: [PATCH 079/239] ENH: Add option in read_csv to infer compression type
 from filename

---
 doc/source/io.rst                  |   2 ++
 doc/source/whatsnew/v0.16.1.txt    |   1 +
 pandas/io/parsers.py               |  23 +++++++++++++++++++----
 pandas/io/tests/data/test1.csv.bz2 | Bin 0 -> 307 bytes
 pandas/io/tests/data/test1.csv.gz  | Bin 0 -> 294 bytes
 pandas/io/tests/test_parsers.py    |  15 +++++++++++++++
 pandas/parser.pyx                  |  11 +++++++++++
 7 files changed, 48 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 doc/source/whatsnew/v0.16.1.txt
 mode change 100644 => 100755 pandas/io/parsers.py
 create mode 100644 pandas/io/tests/data/test1.csv.bz2
 create mode 100644 pandas/io/tests/data/test1.csv.gz
 mode change 100644 => 100755 pandas/io/tests/test_parsers.py

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 1c8a1159ab162..a6c702e1cd874 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -89,6 +89,8 @@ They can take a number of arguments:
   - ``delim_whitespace``: Parse whitespace-delimited (spaces or tabs) file
     (much faster than using a regular expression)
   - ``compression``: decompress ``'gzip'`` and ``'bz2'`` formats on the fly.
+    Set to  ``'infer'`` (the default) to guess a format based on the file
+    extension.
   - ``dialect``: string or :class:`python:csv.Dialect` instance to expose more
     ways to specify the file format
   - ``dtype``: A data type name or a dict of column name to data type. If not
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
old mode 100644
new mode 100755
index eaba1f4c263c3..bbe502a191029
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -80,6 +80,7 @@ API changes
 - :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously
   the order was arbitrary. (:issue:`9777`)
 
+- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
 .. _whatsnew_0161.performance:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
old mode 100644
new mode 100755
index 875a25170b51d..1ca396935ae78
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -56,8 +56,11 @@ class ParserWarning(Warning):
 dtype : Type name or dict of column -> type
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
     (Unsupported with engine='python')
-compression : {'gzip', 'bz2', None}, default None
-    For on-the-fly decompression of on-disk data
+compression : {'gzip', 'bz2', 'infer', None}, default 'infer'
+    For on-the-fly decompression of on-disk data. If 'infer', then use gzip or
+    bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2',
+    respectively, and no decompression otherwise. Set to None for no
+    decompression.
 dialect : string or csv.Dialect instance, default None
     If None defaults to Excel dialect. Ignored if sep longer than 1 char
     See csv.Dialect documentation for more details
@@ -295,7 +298,7 @@ def _read(filepath_or_buffer, kwds):
     'verbose': False,
     'encoding': None,
     'squeeze': False,
-    'compression': None,
+    'compression': 'infer',
     'mangle_dupe_cols': True,
     'tupleize_cols': False,
     'infer_datetime_format': False,
@@ -335,7 +338,7 @@ def _make_parser_function(name, sep=','):
     def parser_f(filepath_or_buffer,
                  sep=sep,
                  dialect=None,
-                 compression=None,
+                 compression='infer',
 
                  doublequote=True,
                  escapechar=None,
@@ -1317,6 +1320,7 @@ def _wrap_compressed(f, compression, encoding=None):
     """
     compression = compression.lower()
     encoding = encoding or get_option('display.encoding')
+
     if compression == 'gzip':
         import gzip
 
@@ -1389,6 +1393,17 @@ def __init__(self, f, **kwds):
         self.comment = kwds['comment']
         self._comment_lines = []
 
+        if self.compression == 'infer':
+            if isinstance(f, compat.string_types):
+                if f.endswith('.gz'):
+                    self.compression = 'gzip'
+                elif f.endswith('.bz2'):
+                    self.compression = 'bz2'
+                else:
+                    self.compression = None
+            else:
+                self.compression = None
+
         if isinstance(f, compat.string_types):
             f = com._get_handle(f, 'r', encoding=self.encoding,
                                 compression=self.compression)
diff --git a/pandas/io/tests/data/test1.csv.bz2 b/pandas/io/tests/data/test1.csv.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..f96f26a8e741907243c32845024b7277f0680005
GIT binary patch
literal 307
zcmV-30nGkFT4*^jL0KkKS<mCJdjJ6&-GBfPKnH*DJOBnE06;JSOh6)^pczV-hC@Pt
z0ia~a8K|k?O(u;r4?a7f+~fB6p55+4y=RNY+{nGz>W1Ji@9)pArt5lpH4IRO00Iz@
zAlz^!=a51W3H02z!+mSBeK%(2i;3qroLGpgc)ZhAta;6*(Kx-+d8k8stbmZHPIKD0
z%{khavXfep_f#A?g|OEiy6GMb+kPrC)~cTlb6Q1R&R1#Y8j1mNvJ1wwH#o39Ih8E!
zh)Zn6%{yHy5rH`%)11mfB0$%H6FSJnOFqhY6AzMzlFupWZ6*MkRe{ySGvcT?8Y%YR
zy=rhGo@XLhosGo#8?jc!M4$nL1(98A8>(wY4d^qqg(%E!y5l#$-h21{F64@Ep&_5g
FVD{@glUM)%

literal 0
HcmV?d00001

diff --git a/pandas/io/tests/data/test1.csv.gz b/pandas/io/tests/data/test1.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..1336db6e2af7e99bbcfc1105a34bfc575dc36e39
GIT binary patch
literal 294
zcmV+>0onc^iwFSY)EiX*167eZQpG?FL~~BT0miP@){@9rjsgJ*F~>*q6PvOFYh3mE
zsptFW^XqrLuDA8RKAsf70XbmLz{}%ZIJ=;%5X;PE=X|E2vcYxWG`b24p<U~cQ=-!f
z6!yjQizg>MTd1xj8vqwIDPuw&2SO=Plp!ZwMZ?<$$Gm`X7xb<u)OaCgOcP-;vm`bz
zY!9k7DHCr_%?a($i^sd*gexSZ*fYhj6g?I;a9|cjrgd`saWX^aY1NxvUbqqPO0-+I
zb0Z?fV<gPecViq~Ye(;&g7K@Mwp={Hech>wUQ8jm3vxQ8$JTYzrw3Qs-g=ik%a+b{
s*hQ0nb}~LhhMDH09vE2TyUUuE=GJ?BbgQBhV0VW60b<1z@;U+l0HUsoVE_OC

literal 0
HcmV?d00001

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
old mode 100644
new mode 100755
index 33579d2d64b29..60eb6539b2030
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -1099,6 +1099,21 @@ def test_read_csv_no_index_name(self):
         self.assertEqual(df.ix[:, ['A', 'B', 'C', 'D']].values.dtype, np.float64)
         tm.assert_frame_equal(df, df2)
 
+    def test_read_csv_infer_compression(self):
+        # GH 9770
+        expected = self.read_csv(self.csv1, index_col=0, parse_dates=True)
+
+        inputs = [self.csv1, self.csv1 + '.gz',
+                  self.csv1 + '.bz2', open(self.csv1)]
+
+        for f in inputs:
+            df = self.read_csv(f, index_col=0, parse_dates=True,
+                compression='infer')
+
+            tm.assert_frame_equal(expected, df)
+
+        inputs[3].close()
+
     def test_read_table_unicode(self):
         fin = BytesIO(u('\u0141aski, Jan;1').encode('utf-8'))
         df1 = read_table(fin, sep=";", encoding="utf-8", header=None)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 73a03fc5cef7c..b28e0587264d4 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -541,6 +541,17 @@ cdef class TextReader:
         self.parser.cb_io = NULL
         self.parser.cb_cleanup = NULL
 
+        if self.compression == 'infer':
+            if isinstance(source, basestring):
+                if source.endswith('.gz'):
+                    self.compression = 'gzip'
+                elif source.endswith('.bz2'):
+                    self.compression = 'bz2'
+                else:
+                    self.compression = None
+            else:
+                self.compression = None
+
         if self.compression:
             if self.compression == 'gzip':
                 import gzip

From 49e5800a67a74edbaceb4848aff8bfb20aee9b8f Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Sat, 18 Apr 2015 13:20:50 -0400
Subject: [PATCH 080/239] BUG: Exception when setting an empty range using
 DataFrame.loc

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/core/internals.py        |  2 +-
 pandas/tests/test_frame.py      | 10 ++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index bbe502a191029..882620a1f7f30 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -238,4 +238,8 @@ Bug Fixes
 =======
 
 - Fixed latex output for multi-indexed dataframes (:issue:`9778`)
+<<<<<<< HEAD
 >>>>>>> 4d1268e... BUG: Fixed latex output for multi-indexed dataframes - GH9778
+=======
+- Bug causing an exception when setting an empty range using ``DataFrame.loc`` (:issue:`9596`)
+>>>>>>> a21f2ce... BUG: Exception when setting an empty range using DataFrame.loc
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 4d0f8394fbd2a..7cc7bc390bcbb 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -582,7 +582,7 @@ def _is_empty_indexer(indexer):
                 if arr_value.ndim == 1:
                     if not isinstance(indexer, tuple):
                         indexer = tuple([indexer])
-                    return all([ isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer ])
+                    return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
                 return False
 
             # empty indexers
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index c7c35e63d3d91..bcba891ee7e9d 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -784,6 +784,16 @@ def test_setitem_None(self):
         assert_series_equal(self.frame[None], self.frame['A'])
         repr(self.frame)
 
+    def test_setitem_empty(self):
+        # GH 9596
+        df = pd.DataFrame({'a': ['1', '2', '3'],
+                           'b': ['11', '22', '33'],
+                           'c': ['111', '222', '333']})
+
+        result = df.copy()
+        result.loc[result.b.isnull(), 'a'] = result.a
+        assert_frame_equal(result, df)
+
     def test_delitem_corner(self):
         f = self.frame.copy()
         del f['D']

From 4dab4f27700f14f74657013f5773552b378cbea1 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Fri, 3 Apr 2015 06:18:55 +0900
Subject: [PATCH 081/239] API: define _constructor_expanddim for subclassing
 Series and DataFrame

---
 doc/source/faq.rst              |   1 +
 doc/source/internals.rst        | 152 ++++++++++++++++++++++++++++++++
 doc/source/whatsnew/v0.16.1.txt |   2 +
 pandas/core/frame.py            |   9 +-
 pandas/core/generic.py          |   4 +
 pandas/core/series.py           |  10 ++-
 pandas/tests/test_frame.py      |  22 ++++-
 pandas/tests/test_series.py     |  16 ++++
 8 files changed, 209 insertions(+), 7 deletions(-)

diff --git a/doc/source/faq.rst b/doc/source/faq.rst
index 467ec02b55f20..20762e3fc039f 100644
--- a/doc/source/faq.rst
+++ b/doc/source/faq.rst
@@ -369,3 +369,4 @@ just a thin layer around the ``QTableView``.
 	    mw = MainWidget()
 	    mw.show()
 	    app.exec_()
+
diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index 9418ca5265f1a..bc1189a8961d6 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -95,3 +95,155 @@ constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
 if you compute the levels and labels yourself, please be careful.
 
 
+.. _:
+
+Subclassing pandas Data Structures
+----------------------------------
+
+.. warning:: There are some easier alternatives before considering subclassing ``pandas`` data structures.
+
+  1. Monkey-patching: See :ref:`Adding Features to your pandas Installation <ref-monkey-patching>`.
+
+  2. Use *composition*. See `here <http://en.wikipedia.org/wiki/Composition_over_inheritance>`_.
+
+This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are 2 points which need attention:
+
+1. Override constructor properties.
+2. Define original properties
+
+.. note:: You can find a nice example in `geopandas <https://github.com/geopandas/geopandas>`_ project.
+
+Override Constructor Properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each data structure has constructor properties to specifying data constructors. By overriding these properties, you can retain defined-classes through ``pandas`` data manipulations.
+
+There are 3 constructors to be defined:
+
+- ``_constructor``: Used when a manipulation result has the same dimesions as the original.
+- ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing.
+- ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()`` and ``DataFrame.to_panel()``.
+
+Following table shows how ``pandas`` data structures define constructor properties by default.
+
+===========================  ======================= =================== =======================
+Property Attributes          ``Series``              ``DataFrame``       ``Panel``
+===========================  ======================= =================== =======================
+``_constructor``             ``Series``              ``DataFrame``       ``Panel``
+``_constructor_sliced``      ``NotImplementedError`` ``Series``          ``DataFrame``
+``_constructor_expanddim``   ``DataFrame``           ``Panel``           ``NotImplementedError``
+===========================  ======================= =================== =======================
+
+Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties.
+
+.. code-block:: python
+
+   class SubclassedSeries(Series):
+
+       @property
+       def _constructor(self):
+           return SubclassedSeries
+
+       @property
+       def _constructor_expanddim(self):
+           return SubclassedDataFrame
+
+   class SubclassedDataFrame(DataFrame):
+
+       @property
+       def _constructor(self):
+           return SubclassedDataFrame
+
+       @property
+       def _constructor_sliced(self):
+           return SubclassedSeries
+
+.. code-block:: python
+
+   >>> s = SubclassedSeries([1, 2, 3])
+   >>> type(s)
+   <class '__main__.SubclassedSeries'>
+
+   >>> to_framed = s.to_frame()
+   >>> type(to_framed)
+   <class '__main__.SubclassedDataFrame'>
+
+   >>> df = SubclassedDataFrame({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
+   >>> df
+      A  B  C
+   0  1  4  7
+   1  2  5  8
+   2  3  6  9
+
+   >>> type(df)
+   <class '__main__.SubclassedDataFrame'>
+
+   >>> sliced1 = df[['A', 'B']]
+   >>> sliced1
+      A  B
+   0  1  4
+   1  2  5
+   2  3  6
+   >>> type(sliced1)
+   <class '__main__.SubclassedDataFrame'>
+
+   >>> sliced2 = df['A']
+   >>> sliced2
+   0    1
+   1    2
+   2    3
+   Name: A, dtype: int64
+   >>> type(sliced2)
+   <class '__main__.SubclassedSeries'>
+
+Define Original Properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To let original data structures have additional properties, you should let ``pandas`` knows what properties are added. ``pandas`` maps unknown properties to data names overriding ``__getattribute__``. Defining original properties can be done in one of 2 ways:
+
+1. Define ``_internal_names`` and ``_internal_names_set`` for temporary properties which WILL NOT be passed to manipulation results.
+2. Define ``_metadata`` for normal properties which will be passed to manipulation results.
+
+Below is an example to define 2 original properties, "internal_cache" as a temporary property and "added_property" as a normal property
+
+.. code-block:: python
+
+   class SubclassedDataFrame2(DataFrame):
+
+       # temporary properties
+       _internal_names = DataFrame._internal_names + ['internal_cache']
+       _internal_names_set = set(_internal_names)
+
+       # normal properties
+       _metadata = ['added_property']
+
+       @property
+       def _constructor(self):
+           return SubclassedDataFrame2
+
+.. code-block:: python
+
+   >>> df = SubclassedDataFrame2({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
+   >>> df
+      A  B  C
+   0  1  4  7
+   1  2  5  8
+   2  3  6  9
+
+   >>> df.internal_cache = 'cached'
+   >>> df.added_property = 'property'
+
+   >>> df.internal_cache
+   cached
+   >>> df.added_property
+   property
+
+   # properties defined in _internal_names is reset after manipulation
+   >>> df[['A', 'B']].internal_cache
+   AttributeError: 'SubclassedDataFrame2' object has no attribute 'internal_cache'
+
+   # properties defined in _metadata are retained
+   >>> df[['A', 'B']].added_property
+   property
+
+
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 882620a1f7f30..9e8c5adee549f 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -56,6 +56,8 @@ Enhancements
 
 - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
 
+- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
+
 .. _whatsnew_0161.api:
 
 API changes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4f7bc11cbf03c..272c401c18761 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -191,6 +191,11 @@ def _constructor(self):
 
     _constructor_sliced = Series
 
+    @property
+    def _constructor_expanddim(self):
+        from pandas.core.panel import Panel
+        return Panel
+
     def __init__(self, data=None, index=None, columns=None, dtype=None,
                  copy=False):
         if data is None:
@@ -1061,8 +1066,6 @@ def to_panel(self):
         -------
         panel : Panel
         """
-        from pandas.core.panel import Panel
-
         # only support this kind for now
         if (not isinstance(self.index, MultiIndex) or  # pragma: no cover
                 len(self.index.levels) != 2):
@@ -1100,7 +1103,7 @@ def to_panel(self):
                                               shape=shape,
                                               ref_items=selfsorted.columns)
 
-        return Panel(new_mgr)
+        return self._constructor_expanddim(new_mgr)
 
     to_wide = deprecate('to_wide', to_panel)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8bd85a008f077..9624b1308239c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -155,6 +155,10 @@ def _local_dir(self):
     def _constructor_sliced(self):
         raise AbstractMethodError(self)
 
+    @property
+    def _constructor_expanddim(self):
+        raise NotImplementedError
+
     #----------------------------------------------------------------------
     # Axis
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index f9c56db018639..7bcf6c6671152 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -236,6 +236,11 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
     def _constructor(self):
         return Series
 
+    @property
+    def _constructor_expanddim(self):
+        from pandas.core.frame import DataFrame
+        return DataFrame
+
     # types
     @property
     def _can_hold_na(self):
@@ -1047,11 +1052,10 @@ def to_frame(self, name=None):
         -------
         data_frame : DataFrame
         """
-        from pandas.core.frame import DataFrame
         if name is None:
-            df = DataFrame(self)
+            df = self._constructor_expanddim(self)
         else:
-            df = DataFrame({name: self})
+            df = self._constructor_expanddim({name: self})
 
         return df
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index bcba891ee7e9d..c001f35ab65cc 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -31,7 +31,7 @@
 import pandas.core.common as com
 import pandas.core.format as fmt
 import pandas.core.datetools as datetools
-from pandas import (DataFrame, Index, Series, notnull, isnull,
+from pandas import (DataFrame, Index, Series, Panel, notnull, isnull,
                     MultiIndex, DatetimeIndex, Timestamp, date_range,
                     read_csv, timedelta_range, Timedelta,
                     option_context)
@@ -14214,6 +14214,26 @@ def _constructor(self):
         # GH9776
         self.assertEqual(df.iloc[0:1, :].testattr, 'XXX')
 
+    def test_to_panel_expanddim(self):
+        # GH 9762
+
+        class SubclassedFrame(DataFrame):
+            @property
+            def _constructor_expanddim(self):
+                return SubclassedPanel
+
+        class SubclassedPanel(Panel):
+            pass
+
+        index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)])
+        df = SubclassedFrame({'X':[1, 2, 3], 'Y': [4, 5, 6]}, index=index)
+        result = df.to_panel()
+        self.assertTrue(isinstance(result, SubclassedPanel))
+        expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
+                                   items=['X', 'Y'], major_axis=[0],
+                                   minor_axis=[0, 1, 2])
+        tm.assert_panel_equal(result, expected)
+
 
 def skip_if_no_ne(engine='numexpr'):
     if engine == 'numexpr':
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index c3b43f3ec70c0..b5ada4cf39b5e 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -6851,6 +6851,22 @@ def test_searchsorted_sorter(self):
         e = np.array([0, 2])
         tm.assert_array_equal(r, e)
 
+    def test_to_frame_expanddim(self):
+        # GH 9762
+
+        class SubclassedSeries(Series):
+            @property
+            def _constructor_expanddim(self):
+                return SubclassedFrame
+
+        class SubclassedFrame(DataFrame):
+            pass
+
+        s = SubclassedSeries([1, 2, 3], name='X')
+        result = s.to_frame()
+        self.assertTrue(isinstance(result, SubclassedFrame))
+        expected = SubclassedFrame({'X': [1, 2, 3]})
+        assert_frame_equal(result, expected)
 
 
 class TestSeriesNonUnique(tm.TestCase):

From e3b66064e5f457972eebe1cfe565ae45004409c3 Mon Sep 17 00:00:00 2001
From: scls19fr <scls19fr@users.noreply.github.com>
Date: Sat, 18 Apr 2015 17:35:13 +0200
Subject: [PATCH 082/239] Update README.md

Travis build status badge should be clickable.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cea7e8c6bfd72..c76fbe7df9e6b 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # pandas: powerful Python data analysis toolkit
 
-![Travis-CI Build Status](https://travis-ci.org/pydata/pandas.svg)
+[![Build Status](https://travis-ci.org/pydata/pandas.svg?branch=master)](https://travis-ci.org/pydata/pandas)
 
 ## What is it
 

From fed024af14b896e05a0a97dc9787fca327afe43e Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Sun, 19 Apr 2015 12:36:25 +0200
Subject: [PATCH 083/239] DOC: Link to recipe for SafeHDF5Store

---
 doc/source/cookbook.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index 0e6386955a653..f69f926296020 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -1006,6 +1006,9 @@ The :ref:`HDFStores <io.hdf5>` docs
 `Merging on-disk tables with millions of rows
 <http://stackoverflow.com/questions/14614512/merging-two-tables-with-millions-of-rows-in-python/14617925#14617925>`__
 
+`Avoiding inconsistencies when writing to a store from multiple processes/threads
+<http://stackoverflow.com/a/29014295/2858145>`__
+
 De-duplicating a large store by chunks, essentially a recursive reduction operation. Shows a function for taking in data from
 csv file and creating a store by chunks, with date parsing as well.
 `See here

From 01a5958111b4611346771362a2a88ed1a9a649fb Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 19 Apr 2015 18:02:12 -0400
Subject: [PATCH 084/239] BUILD: add bz2, gz to setup.py, xref GH9770

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8066a6e0cae4f..f0090aff31430 100755
--- a/setup.py
+++ b/setup.py
@@ -602,7 +602,7 @@ def pxd(name):
                 ],
       package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5',
                                   'tests/data/legacy_pickle/*/*.pickle',
-                                  'tests/data/*.csv',
+                                  'tests/data/*.csv*',
                                   'tests/data/*.dta',
                                   'tests/data/*.txt',
                                   'tests/data/*.xls',

From 7c25be6410681c70574713fe00a5a9bdae276143 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 19 Apr 2015 18:06:50 -0400
Subject: [PATCH 085/239] TST: skip testing locales on win32, xref GH9845

---
 pandas/tests/test_util.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py
index bb8bd3df96b71..38f058358b37f 100644
--- a/pandas/tests/test_util.py
+++ b/pandas/tests/test_util.py
@@ -3,6 +3,7 @@
 
 import nose
 
+import sys
 import pandas.util
 from pandas.util.decorators import deprecate_kwarg
 import pandas.util.testing as tm
@@ -80,6 +81,9 @@ def test_warning(self):
             self.assertNotAlmostEquals(1, 2)
 
     def test_locale(self):
+        if sys.platform == 'win32':
+            raise nose.SkipTest("skipping on win platforms as locale not available")
+
         #GH9744
         locales = pandas.util.testing.get_locales()
         self.assertTrue(len(locales) >= 1)

From da3a67e3c3bdf03068cad5d14f676fadd3f8bf10 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 19 Apr 2015 18:09:42 -0400
Subject: [PATCH 086/239] TST: dtype on comparison for test_panel_to_expanddim

---
 pandas/tests/test_frame.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index c001f35ab65cc..a35e03d53cb31 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -14231,7 +14231,8 @@ class SubclassedPanel(Panel):
         self.assertTrue(isinstance(result, SubclassedPanel))
         expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
                                    items=['X', 'Y'], major_axis=[0],
-                                   minor_axis=[0, 1, 2])
+                                   minor_axis=[0, 1, 2],
+                                   dtype='int64')
         tm.assert_panel_equal(result, expected)
 
 
From 8949c0dd68b0e1e9bd4df45b93f2a047f0b33be0 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 19 Apr 2015 18:13:03 -0400
Subject: [PATCH 087/239] dtype fix for GH9804 on win32

---
 pandas/core/generic.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9624b1308239c..f3d7c48c7d1f1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3341,10 +3341,18 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
 
                 matches = (new_other == np.array(other))
                 if matches is False or not matches.all():
-                    other = np.array(other)
+                    
+                    # coerce other to a common dtype if we can
+                    if com.needs_i8_conversion(self.dtype):
+                        try:
+                            other = np.array(other, dtype=self.dtype)
+                        except:
+                            other = np.array(other)
+                    else:
+                        other = np.asarray(other)
+                        other = np.asarray(other, dtype=np.common_type(other, new_other))
 
-                    # we can't use our existing dtype
-                    # because of incompatibilities
+                    # we need to use the new dtype
                     try_quick = False
                 else:
                     other = new_other

From 27bf8cfa002d3bcd6b8742e9e66bce12a51241f8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 20 Apr 2015 06:51:03 -0400
Subject: [PATCH 088/239] DOC: fix incorrect issue numbers in whatsnew

---
 doc/source/whatsnew/v0.16.1.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 9e8c5adee549f..3b2daed5c28d8 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -145,12 +145,16 @@ Bug Fixes
 - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 53f2ea4... BUG: Issue 9798 fixed
 =======
 
 =======
 - Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9875`)
 >>>>>>> 07257a0... BUG: Fixing == __eq__ operator for MultiIndex ... closes (GH9785)
+=======
+- Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9785`)
+>>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 - Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -163,11 +167,14 @@ Bug Fixes
 =======
 >>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
 - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`)
+<<<<<<< HEAD
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
 <<<<<<< HEAD
 >>>>>>> f0ac930... Fix: unequal comparisons of categorical and scalar
 =======
+=======
+>>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 
 
@@ -198,6 +205,7 @@ Bug Fixes
 >>>>>>> a97113c... ENH: Raise error on trying to write excel file with a MultiIndexed DataFrame. closes #9794
 - Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
 - Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`)
+<<<<<<< HEAD
 >>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
 - Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9875`)
 <<<<<<< HEAD
@@ -207,6 +215,9 @@ Bug Fixes
 
 =======
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
+=======
+- Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9856`)
+>>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 - Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`)
 <<<<<<< HEAD
 >>>>>>> 30580e7... Groupby transform preserves output dtype

From dcba08d37d220f9367b8ae5f3798c0e56a2d2b12 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 18 Apr 2015 10:08:59 +0900
Subject: [PATCH 089/239] BUG: GroupBy.size doesnt attach index name properly
 if grouped by TimeGrouper

---
 doc/source/whatsnew/v0.16.1.txt       |  2 +-
 pandas/core/groupby.py                |  6 ++++--
 pandas/tseries/tests/test_resample.py | 16 +++++++++-------
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 3b2daed5c28d8..a0069268855ae 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -176,7 +176,7 @@ Bug Fixes
 =======
 >>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 
-
+- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
 
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 6b2c9639ac71f..4ef3bbce85467 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1780,12 +1780,14 @@ def size(self):
         Compute group sizes
 
         """
-        base = Series(np.zeros(len(self.result_index), dtype=np.int64),
-                      index=self.result_index)
+        index = self.result_index
+        base = Series(np.zeros(len(index), dtype=np.int64), index=index)
         indices = self.indices
         for k, v in compat.iteritems(indices):
             indices[k] = len(v)
         bin_counts = Series(indices, dtype=np.int64)
+        # make bin_counts.index to have same name to preserve it
+        bin_counts.index.name = index.name
         result = base.add(bin_counts, fill_value=0)
         # addition with fill_value changes dtype to float64
         result = result.astype(np.int64)
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index c338bbeae79c7..2ae311e044a75 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -875,23 +875,23 @@ def test_resmaple_dst_anchor(self):
         # 5172
         dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')
         df = DataFrame([5], index=dti)
-        assert_frame_equal(df.resample(rule='D', how='sum'), 
+        assert_frame_equal(df.resample(rule='D', how='sum'),
                            DataFrame([5], index=df.index.normalize()))
         df.resample(rule='MS', how='sum')
         assert_frame_equal(df.resample(rule='MS', how='sum'),
-                           DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], 
+                           DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)],
                                                               tz='US/Eastern')))
 
         dti = date_range('2013-09-30', '2013-11-02', freq='30Min', tz='Europe/Paris')
         values = range(dti.size)
         df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype='int64')
         how = {"a": "min", "b": "max", "c": "count"}
-        
+
         assert_frame_equal(df.resample("W-MON", how=how)[["a", "b", "c"]],
                            DataFrame({"a": [0, 48, 384, 720, 1056, 1394],
                                       "b": [47, 383, 719, 1055, 1393, 1586],
                                       "c": [48, 336, 336, 336, 338, 193]},
-                                     index=date_range('9/30/2013', '11/4/2013', 
+                                     index=date_range('9/30/2013', '11/4/2013',
                                                       freq='W-MON', tz='Europe/Paris')),
                            'W-MON Frequency')
 
@@ -899,7 +899,7 @@ def test_resmaple_dst_anchor(self):
                            DataFrame({"a": [0, 48, 720, 1394],
                                       "b": [47, 719, 1393, 1586],
                                       "c": [48, 672, 674, 193]},
-                                     index=date_range('9/30/2013', '11/11/2013', 
+                                     index=date_range('9/30/2013', '11/11/2013',
                                                       freq='2W-MON', tz='Europe/Paris')),
                            '2W-MON Frequency')
 
@@ -907,7 +907,7 @@ def test_resmaple_dst_anchor(self):
                            DataFrame({"a": [0, 48, 1538],
                                       "b": [47, 1537, 1586],
                                       "c": [48, 1490, 49]},
-                                     index=date_range('9/1/2013', '11/1/2013', 
+                                     index=date_range('9/1/2013', '11/1/2013',
                                                       freq='MS', tz='Europe/Paris')),
                            'MS Frequency')
 
@@ -915,7 +915,7 @@ def test_resmaple_dst_anchor(self):
                            DataFrame({"a": [0, 1538],
                                       "b": [1537, 1586],
                                       "c": [1538, 49]},
-                                     index=date_range('9/1/2013', '11/1/2013', 
+                                     index=date_range('9/1/2013', '11/1/2013',
                                                       freq='2MS', tz='Europe/Paris')),
                            '2MS Frequency')
 
@@ -1553,6 +1553,8 @@ def test_aggregate_with_nat(self):
             expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
             dt_result = getattr(dt_grouped, func)()
             assert_series_equal(expected, dt_result)
+            # GH 9925
+            self.assertEqual(dt_result.index.name, 'key')
 
         # if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth' doesn't work yet
 

From 7f1f998d7781be4d8e0f55dc8226af87bd73b8c9 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 19 Apr 2015 13:13:26 -0400
Subject: [PATCH 090/239] PERF: improve perf of writing csv's with datetimes

---
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 pandas/core/format.py           | 76 ++++++++++++++++++---------------
 pandas/core/internals.py        | 21 +++------
 pandas/tests/test_format.py     |  4 +-
 pandas/tseries/index.py         |  3 +-
 pandas/tslib.pyx                | 54 +++++++++++++++++++++++
 6 files changed, 105 insertions(+), 55 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a0069268855ae..70dd6b0aa0533 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -89,7 +89,7 @@ API changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-
+- Improved csv write performance with mixed dtypes, including datetimes (:issue:`9940`)
 
 
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 06e1fab27cd6d..bbf90b0e9567a 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -14,15 +14,14 @@
 from pandas.core.config import get_option, set_option
 import pandas.core.common as com
 import pandas.lib as lib
-from pandas.tslib import iNaT, Timestamp, Timedelta
-
+from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime
+from pandas.tseries.index import DatetimeIndex
+from pandas.tseries.period import PeriodIndex
 import numpy as np
 
 import itertools
 import csv
 
-from pandas.tseries.period import PeriodIndex, DatetimeIndex
-
 docstring_to_string = """
      Parameters
      ----------
@@ -2030,16 +2029,50 @@ def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
         self.date_format = date_format
 
     def _format_strings(self):
-        formatter = (self.formatter or
-                     _get_format_datetime64_from_values(self.values,
-                                                        nat_rep=self.nat_rep,
-                                                        date_format=self.date_format))
 
-        fmt_values = [formatter(x) for x in self.values]
+        # we may have a tz, if so, then need to process element-by-element
+        # when DatetimeBlockWithTimezones is a reality this could be fixed
+        values = self.values
+        if not isinstance(values, DatetimeIndex):
+            values = DatetimeIndex(values)
+
+        if values.tz is None:
+
+            is_dates_only = _is_dates_only(values)
+            if is_dates_only:
+                formatter = self.date_format or "%Y-%m-%d"
+            else:
+                formatter = None
+
+            fmt_values = format_array_from_datetime(values.asi8.ravel(),
+                                                    format=formatter,
+                                                    na_rep=self.nat_rep).reshape(values.shape)
+            fmt_values = fmt_values.tolist()
+
+        else:
+
+            values = values.asobject
+            is_dates_only = _is_dates_only(values)
+            formatter = (self.formatter or _get_format_datetime64(is_dates_only, values, date_format=self.date_format))
+            fmt_values = [ formatter(x) for x in self.values ]
 
         return fmt_values
 
 
+def _is_dates_only(values):
+    # return a boolean if we are only dates (and don't have a timezone)
+    values = DatetimeIndex(values)
+    if values.tz is not None:
+        return False
+
+    values_int = values.asi8
+    consider_values = values_int != iNaT
+    one_day_nanos = (86400 * 1e9)
+    even_days = np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
+    if even_days:
+        return True
+    return False
+
 def _format_datetime64(x, tz=None, nat_rep='NaT'):
     if x is None or lib.checknull(x):
         return nat_rep
@@ -2062,22 +2095,6 @@ def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
     else:
         return x._date_repr
 
-
-def _is_dates_only(values):
-    # return a boolean if we are only dates (and don't have a timezone)
-    from pandas import DatetimeIndex
-    values = DatetimeIndex(values)
-    if values.tz is not None:
-        return False
-
-    values_int = values.asi8
-    consider_values = values_int != iNaT
-    one_day_nanos = (86400 * 1e9)
-    even_days = np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
-    if even_days:
-        return True
-    return False
-
 def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
 
     if is_dates_only:
@@ -2088,15 +2105,6 @@ def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
         return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
 
 
-def _get_format_datetime64_from_values(values,
-                                       nat_rep='NaT',
-                                       date_format=None):
-    is_dates_only = _is_dates_only(values)
-    return _get_format_datetime64(is_dates_only=is_dates_only,
-                                  nat_rep=nat_rep,
-                                  date_format=date_format)
-
-
 class Timedelta64Formatter(GenericArrayFormatter):
 
     def __init__(self, values, nat_rep='NaT', box=False, **kwargs):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7cc7bc390bcbb..e30b63def117e 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1870,23 +1870,12 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
         values = self.values
         if slicer is not None:
             values = values[:, slicer]
-        mask = isnull(values)
 
-        rvalues = np.empty(values.shape, dtype=object)
-        if na_rep is None:
-            na_rep = 'NaT'
-        rvalues[mask] = na_rep
-        imask = (~mask).ravel()
-
-        if date_format is None:
-            date_formatter = lambda x: Timestamp(x)._repr_base
-        else:
-            date_formatter = lambda x: Timestamp(x).strftime(date_format)
-
-        rvalues.flat[imask] = np.array([date_formatter(val) for val in
-                                        values.ravel()[imask]], dtype=object)
-
-        return rvalues.tolist()
+        result = tslib.format_array_from_datetime(values.view('i8').ravel(),
+                                                  tz=None,
+                                                  format=date_format,
+                                                  na_rep=na_rep).reshape(values.shape)
+        return result.tolist()
 
     def should_store(self, value):
         return issubclass(value.dtype.type, np.datetime64)
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index e3455d2449b55..b557594e8e7ef 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -3010,12 +3010,12 @@ def test_format(self):
 
     def test_output_significant_digits(self):
         # Issue #9764
-        
+
         # In case default display precision changes:
         with pd.option_context('display.precision', 7):
             # DataFrame example from issue #9764
             d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
-            
+
             expected_output={
                 (0,6):'           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
                 (1,6):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 7dac36a9ae5cc..c9b2d65f40296 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -673,9 +673,8 @@ def _add_delta(self, delta):
 
     def _format_native_types(self, na_rep=u('NaT'),
                              date_format=None, **kwargs):
-        data = self.asobject
         from pandas.core.format import Datetime64Formatter
-        return Datetime64Formatter(values=data,
+        return Datetime64Formatter(values=self,
                                    nat_rep=na_rep,
                                    date_format=date_format,
                                    justify='all').get_result()
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 7580fa5489e15..0ef5d847da924 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -1398,6 +1398,60 @@ def parse_datetime_string(date_string, **kwargs):
     dt = parse_date(date_string, **kwargs)
     return dt
 
+def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None):
+    """
+    return a np object array of the string formatted values
+
+    Parameters
+    ----------
+    values : a 1-d i8 array
+    tz : the timezone (or None)
+    format : optional, default is None
+          a strftime capable string
+    na_rep : optional, default is None
+          a nat format
+
+    """
+    cdef:
+        int64_t val, ns, N = len(values)
+        ndarray[object] result = np.empty(N, dtype=object)
+        object ts, res
+        pandas_datetimestruct dts
+
+    if na_rep is None:
+       na_rep = 'NaT'
+
+    for i in range(N):
+       val = values[i]
+
+       if val == iNaT:
+          result[i] = na_rep
+       else:
+          if format is None and tz is None:
+
+            pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
+            res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year,
+                                                   dts.month,
+                                                   dts.day,
+                                                   dts.hour,
+                                                   dts.min,
+                                                   dts.sec)
+
+            ns = dts.ps / 1000
+
+            if ns != 0:
+               res += '.%.9d' % (ns + 1000 * dts.us)
+            elif dts.us != 0:
+               res += '.%.6d' % dts.us
+
+            result[i] = res
+
+          else:
+             ts = Timestamp(val, tz=tz)
+             result[i] = ts.strftime(format)
+
+    return result
+
 def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
                       format=None, utc=None, coerce=False, unit=None):
     cdef:

From d7be337b0311cc036313d1a0b1e5aae204bc69fe Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 19 Apr 2015 17:44:58 -0400
Subject: [PATCH 091/239] perf improvements for other native type writers

---
 doc/source/whatsnew/v0.16.1.txt |  3 ++-
 pandas/core/format.py           | 45 ++++++++++++++++++-------------
 pandas/core/index.py            | 12 ++++++---
 pandas/core/internals.py        | 47 ++++++++++++++++++++-------------
 pandas/lib.pyx                  |  2 +-
 pandas/tseries/base.py          |  2 +-
 pandas/tseries/index.py         | 12 +++++----
 pandas/tseries/period.py        |  4 +--
 pandas/tslib.pyx                | 11 +++++++-
 9 files changed, 87 insertions(+), 51 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 70dd6b0aa0533..18d721f763096 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -89,7 +89,8 @@ API changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Improved csv write performance with mixed dtypes, including datetimes (:issue:`9940`)
+- Improved csv write performance with mixed dtypes, including datetimes by up to 5x (:issue:`9940`)
+- Improved csv write performance generally by 2x (:issue:`9940`)
 
 
diff --git a/pandas/core/format.py b/pandas/core/format.py
index bbf90b0e9567a..6e632e6ea741b 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -1258,9 +1258,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
             if isinstance(cols, Index):
                 cols = cols.to_native_types(na_rep=na_rep,
                                             float_format=float_format,
-                                            date_format=date_format)
+                                            date_format=date_format,
+                                            quoting=self.quoting)
             else:
-                cols = list(cols)
+                cols = np.asarray(list(cols))
             self.obj = self.obj.loc[:, cols]
 
         # update columns to include possible multiplicity of dupes
@@ -1269,9 +1270,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
         if isinstance(cols, Index):
             cols = cols.to_native_types(na_rep=na_rep,
                                         float_format=float_format,
-                                        date_format=date_format)
+                                        date_format=date_format,
+                                        quoting=self.quoting)
         else:
-            cols = list(cols)
+            cols = np.asarray(list(cols))
 
         # save it
         self.cols = cols
@@ -1370,8 +1372,10 @@ def strftime_with_nulls(x):
         values = self.obj.copy()
         values.index = data_index
         values.columns = values.columns.to_native_types(
-            na_rep=na_rep, float_format=float_format,
-            date_format=date_format)
+            na_rep=na_rep,
+            float_format=float_format,
+            date_format=date_format,
+            quoting=self.quoting)
         values = values[cols]
 
         series = {}
@@ -1542,18 +1546,22 @@ def _save_chunk(self, start_i, end_i):
         slicer = slice(start_i, end_i)
         for i in range(len(self.blocks)):
             b = self.blocks[i]
-            d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
+            d = b.to_native_types(slicer=slicer,
+                                  na_rep=self.na_rep,
                                   float_format=self.float_format,
                                   decimal=self.decimal,
-                                  date_format=self.date_format)
+                                  date_format=self.date_format,
+                                  quoting=self.quoting)
 
             for col_loc, col in zip(b.mgr_locs, d):
                 # self.data is a preallocated list
                 self.data[col_loc] = col
 
-        ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
+        ix = data_index.to_native_types(slicer=slicer,
+                                        na_rep=self.na_rep,
                                         float_format=self.float_format,
-                                        date_format=self.date_format)
+                                        date_format=self.date_format,
+                                        quoting=self.quoting)
 
         lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
 
@@ -2037,15 +2045,8 @@ def _format_strings(self):
             values = DatetimeIndex(values)
 
         if values.tz is None:
-
-            is_dates_only = _is_dates_only(values)
-            if is_dates_only:
-                formatter = self.date_format or "%Y-%m-%d"
-            else:
-                formatter = None
-
             fmt_values = format_array_from_datetime(values.asi8.ravel(),
-                                                    format=formatter,
+                                                    format=_get_format_datetime64_from_values(values, self.date_format),
                                                     na_rep=self.nat_rep).reshape(values.shape)
             fmt_values = fmt_values.tolist()
 
@@ -2105,6 +2106,14 @@ def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
         return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
 
 
+def _get_format_datetime64_from_values(values, date_format):
+    """ given values and a date_format, return a string format """
+    is_dates_only = _is_dates_only(values)
+    if is_dates_only:
+        return date_format or "%Y-%m-%d"
+    return None
+
+
 class Timedelta64Formatter(GenericArrayFormatter):
 
     def __init__(self, values, nat_rep='NaT', box=False, **kwargs):
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 0a3adbd19ae92..8b509c6876ec7 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1071,12 +1071,16 @@ def to_native_types(self, slicer=None, **kwargs):
             values = values[slicer]
         return values._format_native_types(**kwargs)
 
-    def _format_native_types(self, na_rep='', **kwargs):
+    def _format_native_types(self, na_rep='', quoting=None, **kwargs):
         """ actually format my specific types """
         mask = isnull(self)
-        values = np.array(self, dtype=object, copy=True)
+        if not self.is_object() and not quoting:
+            values = np.asarray(self).astype(str)
+        else:
+            values = np.array(self, dtype=object, copy=True)
+
         values[mask] = na_rep
-        return values.tolist()
+        return values
 
     def equals(self, other):
         """
@@ -3298,7 +3302,7 @@ def _reference_duplicate_name(self, name):
         return np.sum(name == np.asarray(self.names)) > 1
 
     def _format_native_types(self, **kwargs):
-        return self.tolist()
+        return self.values
 
     @property
     def _constructor(self):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index e30b63def117e..864dc0dd46de2 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -484,16 +484,21 @@ def _try_coerce_and_cast_result(self, result, dtype=None):
     def _try_fill(self, value):
         return value
 
-    def to_native_types(self, slicer=None, na_rep='', **kwargs):
+    def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
         values = self.values
         if slicer is not None:
             values = values[:, slicer]
-        values = np.array(values, dtype=object)
         mask = isnull(values)
+
+        if not self.is_object and not quoting:
+            values = values.astype(str)
+        else:
+            values = np.array(values, dtype='object')
+
         values[mask] = na_rep
-        return values.tolist()
+        return values
 
     # block actions ####
     def copy(self, deep=True):
@@ -1221,32 +1226,34 @@ def _try_cast(self, element):
             return element
 
     def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.',
-                        **kwargs):
+                        quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
         values = self.values
         if slicer is not None:
             values = values[:, slicer]
-        values = np.array(values, dtype=object)
         mask = isnull(values)
-        values[mask] = na_rep
-
 
+        formatter = None
         if float_format and decimal != '.':
             formatter = lambda v : (float_format % v).replace('.',decimal,1)
         elif decimal != '.':
             formatter = lambda v : ('%g' % v).replace('.',decimal,1)
         elif float_format:
             formatter = lambda v : float_format % v
+
+        if formatter is None and not quoting:
+            values = values.astype(str)
         else:
-            formatter = None
+            values = np.array(values, dtype='object')
 
+        values[mask] = na_rep
         if formatter:
             imask = (~mask).ravel()
             values.flat[imask] = np.array(
                 [formatter(val) for val in values.ravel()[imask]])
 
-        return values.tolist()
+        return values
 
     def should_store(self, value):
         # when inserting a column should not coerce integers to floats
@@ -1366,7 +1373,7 @@ def _try_coerce_result(self, result):
     def should_store(self, value):
         return issubclass(value.dtype.type, np.timedelta64)
 
-    def to_native_types(self, slicer=None, na_rep=None, **kwargs):
+    def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
         values = self.values
@@ -1387,7 +1394,7 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
         rvalues.flat[imask] = np.array([Timedelta(val)._repr_base(format='all')
                                         for val in values.ravel()[imask]],
                                        dtype=object)
-        return rvalues.tolist()
+        return rvalues
 
 
     def get_values(self, dtype=None):
@@ -1763,18 +1770,19 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
                           ndim=self.ndim,
                           placement=self.mgr_locs)
 
-    def to_native_types(self, slicer=None, na_rep='', **kwargs):
+    def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
         values = self.values
         if slicer is not None:
             # Categorical is always one dimension
             values = values[slicer]
-        values = np.array(values, dtype=object)
         mask = isnull(values)
+        values = np.array(values, dtype='object')
         values[mask] = na_rep
-        # Blocks.to_native_type returns list of lists, but we are always only a list
-        return [values.tolist()]
+
+        # we are expected to return a 2-d ndarray
+        return values.reshape(1,len(values))
 
 class DatetimeBlock(Block):
     __slots__ = ()
@@ -1864,18 +1872,21 @@ def fillna(self, value, limit=None,
                            fastpath=True, placement=self.mgr_locs)]
 
     def to_native_types(self, slicer=None, na_rep=None, date_format=None,
-                        **kwargs):
+                        quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
         values = self.values
         if slicer is not None:
             values = values[:, slicer]
 
+        from pandas.core.format import _get_format_datetime64_from_values
+        format = _get_format_datetime64_from_values(values, date_format)
+
         result = tslib.format_array_from_datetime(values.view('i8').ravel(),
                                                   tz=None,
-                                                  format=date_format,
+                                                  format=format,
                                                   na_rep=na_rep).reshape(values.shape)
-        return result.tolist()
+        return result
 
     def should_store(self, value):
         return issubclass(value.dtype.type, np.datetime64)
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 5ab2ee4327177..0d53b19425c2f 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -933,7 +933,7 @@ def string_array_replace_from_nan_rep(ndarray[object, ndim=1] arr, object nan_re
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def write_csv_rows(list data, list data_index, int nlevels, list cols, object writer):
+def write_csv_rows(list data, ndarray data_index, int nlevels, ndarray cols, object writer):
 
     cdef int N, j, i, ncols
     cdef list rows
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 2b37c64940170..9da71423b3daa 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -61,7 +61,7 @@ def groupby(self, f):
         return _algos.groupby_object(objs, f)
 
     def _format_with_header(self, header, **kwargs):
-        return header + self._format_native_types(**kwargs)
+        return header + list(self._format_native_types(**kwargs))
 
     def __contains__(self, key):
         try:
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index c9b2d65f40296..7b0ff578b0d90 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -673,11 +673,13 @@ def _add_delta(self, delta):
 
     def _format_native_types(self, na_rep=u('NaT'),
                              date_format=None, **kwargs):
-        from pandas.core.format import Datetime64Formatter
-        return Datetime64Formatter(values=self,
-                                   nat_rep=na_rep,
-                                   date_format=date_format,
-                                   justify='all').get_result()
+        from pandas.core.format import _get_format_datetime64_from_values
+        format = _get_format_datetime64_from_values(self, date_format)
+
+        return tslib.format_array_from_datetime(self.asi8,
+                                                tz=self.tz,
+                                                format=format,
+                                                na_rep=na_rep)
 
     def to_datetime(self, dayfirst=False):
         return self.copy()
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index b1f0ba1f127fa..a4b754f5a6bbd 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -387,7 +387,7 @@ def to_datetime(self, dayfirst=False):
     qyear = _field_accessor('qyear', 1)
     days_in_month = _field_accessor('days_in_month', 11, "The number of days in the month")
     daysinmonth = days_in_month
-    
+
     def _get_object_array(self):
         freq = self.freq
         return np.array([ Period._from_ordinal(ordinal=x, freq=freq) for x in self.values], copy=False)
@@ -687,7 +687,7 @@ def _format_native_types(self, na_rep=u('NaT'), **kwargs):
 
         imask = ~mask
         values[imask] = np.array([u('%s') % dt for dt in values[imask]])
-        return values.tolist()
+        return values
 
     def __array_finalize__(self, obj):
         if not self.ndim:  # pragma: no cover
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 0ef5d847da924..40dbbd7584c7a 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -1448,7 +1448,16 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object f
 
           else:
              ts = Timestamp(val, tz=tz)
-             result[i] = ts.strftime(format)
+             if format is None:
+                 result[i] = str(ts)
+             else:
+
+                 # invalid format string
+                 # requires dates > 1900
+                 try:
+                     result[i] = ts.strftime(format)
+                 except ValueError:
+                     result[i] = str(ts)
 
     return result
 

From ea1dc5eecba201c699bcdfc325c05319adaa916a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 25 Mar 2015 22:58:46 -0400
Subject: [PATCH 092/239] support CategoricalIndex

raise KeyError when accessing invalid elements
setting elements not in the categories is equiv of .append() (which coerces to an Index)
---
 doc/source/advanced.rst          |  90 +++-
 doc/source/api.rst               |  28 ++
 doc/source/whatsnew/v0.16.1.txt  |  75 +++
 pandas/core/api.py               |   2 +-
 pandas/core/base.py              |   6 +-
 pandas/core/categorical.py       | 132 +++--
 pandas/core/common.py            |  26 +
 pandas/core/groupby.py           |  19 +-
 pandas/core/index.py             | 797 +++++++++++++++++++++++++++----
 pandas/core/indexing.py          | 133 ++----
 pandas/core/internals.py         |   8 +-
 pandas/core/series.py            |   3 +-
 pandas/tests/test_categorical.py |  64 ++-
 pandas/tests/test_frame.py       |  41 +-
 pandas/tests/test_groupby.py     |  28 +-
 pandas/tests/test_index.py       | 633 +++++++++++++++++++-----
 pandas/tests/test_indexing.py    | 207 ++++++++
 pandas/util/testing.py           |  33 +-
 18 files changed, 1950 insertions(+), 375 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 1749409c863df..688935c6b104d 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -594,6 +594,95 @@ faster than fancy indexing.
    timeit ser.ix[indexer]
    timeit ser.take(indexer)
 
+.. _indexing.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. versionadded:: 0.16.1
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython:: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+Setting the index, will create create a ``CategoricalIndex``
+
+.. ipython:: python
+
+   df2 = df.set_index('B')
+   df2.index
+
+Indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an ``Index`` with duplicates.
+The indexers MUST be in the category or the operation will raise.
+
+.. ipython:: python
+
+   df2.loc['a']
+
+These PRESERVE the ``CategoricalIndex``
+
+.. ipython:: python
+
+   df2.loc['a'].index
+
+Sorting will order by the order of the categories
+
+.. ipython:: python
+
+   df2.sort_index()
+
+Groupby operations on the index will preserve the index nature as well
+
+.. ipython:: python
+
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
+
+Reindexing operations, will return a resulting index based on the type of the passed
+indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
+a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
+of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
+values NOT in the categories, similarly to how you can reindex ANY pandas index.
+
+.. ipython :: python
+
+   df2.reindex(['a','e'])
+   df2.reindex(['a','e']).index
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
+
+.. warning::
+
+   Reshaping and Comparision operations on a ``CategoricalIndex`` must have the same categories
+   or a ``TypeError`` will be raised.
+
+   .. code-block:: python
+
+      In [10]: df3 = DataFrame({'A' : np.arange(6),
+                                'B' : Series(list('aabbca')).astype('category',
+                                                                    categories=list('abc'))
+                               }).set_index('B')
+
+      In [11]: df3.index
+      Out[11]:
+      CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'],
+                       categories=[u'a', u'b', u'c'],
+                       ordered=False)
+
+      In [12]: pd.concat([df2,df3]
+      TypeError: categories must match existing categories when appending
+
 .. _indexing.float64index:
 
 Float64Index
@@ -706,4 +795,3 @@ Of course if you need integer based selection, then use ``iloc``
 .. ipython:: python
 
    dfir.iloc[0:5]
-
diff --git a/doc/source/api.rst b/doc/source/api.rst
index af9f8c84388bd..b1540ff528605 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1291,6 +1291,34 @@ Selecting
    Index.slice_indexer
    Index.slice_locs
 
+.. _api.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex
+
+Categorical Components
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex.codes
+   CategoricalIndex.categories
+   CategoricalIndex.ordered
+   CategoricalIndex.rename_categories
+   CategoricalIndex.reorder_categories
+   CategoricalIndex.add_categories
+   CategoricalIndex.remove_categories
+   CategoricalIndex.remove_unused_categories
+   CategoricalIndex.set_categories
+   CategoricalIndex.as_ordered
+   CategoricalIndex.as_unordered
+
 .. _api.datetimeindex:
 
 DatetimeIndex
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 18d721f763096..bcd50f02c29b7 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -7,6 +7,10 @@ This is a minor bug-fix release from 0.16.0 and includes a a large number of
 bug fixes along several new features, enhancements, and performance improvements.
 We recommend that all users upgrade to this version.
 
+Highlights include:
+
+- Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161`.enhancements.categoricalindex>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
@@ -31,6 +35,7 @@ Enhancements
   will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression
   to work naturally:
 
+
   .. ipython:: python
 
      idx = Index(['a1', 'a2', 'b1', 'b2'])
@@ -40,6 +45,7 @@ Enhancements
      s[s.index.str.startswith('a')]
 
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
+
 - ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
 
   .. ipython:: python
@@ -58,6 +64,75 @@ Enhancements
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
+.. _whatsnew_0161.enhancements.categoricalindex:
+
+CategoricalIndex
+^^^^^^^^^^^^^^^^
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython :: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+setting the index, will create create a CategoricalIndex
+
+.. ipython :: python
+
+   df2 = df.set_index('B')
+   df2.index
+
+indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates.
+The indexers MUST be in the category or the operation will raise.
+
+.. ipython :: python
+
+   df2.loc['a']
+
+and preserves the ``CategoricalIndex``
+
+.. ipython :: python
+
+   df2.loc['a'].index
+
+sorting will order by the order of the categories
+
+.. ipython :: python
+
+   df2.sort_index()
+
+groupby operations on the index will preserve the index nature as well
+
+.. ipython :: python
+
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
+
+reindexing operations, will return a resulting index based on the type of the passed
+indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
+a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
+of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
+values NOT in the categories, similarly to how you can reindex ANY pandas index.
+
+.. ipython :: python
+
+   df2.reindex(['a','e'])
+   df2.reindex(['a','e']).index
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
+
+See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
+>>>>>>> support CategoricalIndex
+
 .. _whatsnew_0161.api:
 
 API changes
diff --git a/pandas/core/api.py b/pandas/core/api.py
index a8b10342593ce..fde9bc77c4bd9 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -8,7 +8,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.groupby import Grouper
 from pandas.core.format import set_eng_float_format
-from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
+from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame
diff --git a/pandas/core/base.py b/pandas/core/base.py
index a25651a73f507..c0233a5a33308 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -121,7 +121,7 @@ def _delegate_method(self, name, *args, **kwargs):
         raise TypeError("You cannot call method {name}".format(name=name))
 
     @classmethod
-    def _add_delegate_accessors(cls, delegate, accessors, typ):
+    def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite=False):
         """
         add accessors to cls from the delegate class
 
@@ -131,6 +131,8 @@ def _add_delegate_accessors(cls, delegate, accessors, typ):
         delegate : the class to get methods/properties & doc-strings
         acccessors : string list of accessors to add
         typ : 'property' or 'method'
+        overwrite : boolean, default False
+           overwrite the method/property in the target class if it exists
 
         """
 
@@ -164,7 +166,7 @@ def f(self, *args, **kwargs):
                 f = _create_delegator_method(name)
 
             # don't overwrite existing methods/properties
-            if not hasattr(cls, name):
+            if overwrite or not hasattr(cls, name):
                 setattr(cls,name,f)
 
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 0d66a89b0a585..9537523380350 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -9,12 +9,11 @@
 
 from pandas.core.algorithms import factorize
 from pandas.core.base import PandasObject, PandasDelegate
-from pandas.core.index import Index, _ensure_index
-from pandas.tseries.period import PeriodIndex
 import pandas.core.common as com
 from pandas.util.decorators import cache_readonly
 
-from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
+from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
+                                isnull, notnull, is_dtype_equal,
                                 is_categorical_dtype, is_integer_dtype, is_object_dtype,
                                 _possibly_infer_to_datetimelike, get_dtype_kinds,
                                 is_list_like, is_sequence, is_null_slice, is_bool,
@@ -22,7 +21,6 @@
                                 _coerce_indexer_dtype, _values_from_object, take_1d)
 from pandas.util.terminal import get_terminal_size
 from pandas.core.config import get_option
-from pandas.core import format as fmt
 
 def _cat_compare_op(op):
     def f(self, other):
@@ -86,7 +84,7 @@ def f(self, other):
 
 def maybe_to_categorical(array):
     """ coerce to a categorical if a series is given """
-    if isinstance(array, ABCSeries):
+    if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
         return array.values
     return array
 
@@ -236,15 +234,17 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
         # sanitize input
         if is_categorical_dtype(values):
 
-            # we are either a Series or a Categorical
-            cat = values
-            if isinstance(values, ABCSeries):
-                cat = values.values
+            # we are either a Series or a CategoricalIndex
+            if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
+                values = values.values
+
+            if ordered is None:
+                ordered = values.ordered
             if categories is None:
-                categories = cat.categories
+                categories = values.categories
             values = values.__array__()
 
-        elif isinstance(values, Index):
+        elif isinstance(values, ABCIndexClass):
             pass
 
         else:
@@ -295,11 +295,11 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
                 warn("Values and categories have different dtypes. Did you mean to use\n"
                      "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
 
-            if is_integer_dtype(values) and (codes == -1).all():
+            if len(values) and is_integer_dtype(values) and (codes == -1).all():
                 warn("None of the categories were found in values. Did you mean to use\n"
                      "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
 
-        self.set_ordered(ordered, inplace=True)
+        self.set_ordered(ordered or False, inplace=True)
         self.categories = categories
         self.name = name
         self._codes = _coerce_indexer_dtype(codes, categories)
@@ -309,11 +309,27 @@ def copy(self):
         return Categorical(values=self._codes.copy(),categories=self.categories,
                            name=self.name, ordered=self.ordered, fastpath=True)
 
+    def astype(self, dtype):
+        """ coerce this type to another dtype """
+        if is_categorical_dtype(dtype):
+            return self
+        return np.array(self, dtype=dtype)
+
     @cache_readonly
     def ndim(self):
         """Number of dimensions of the Categorical """
         return self._codes.ndim
 
+    @cache_readonly
+    def size(self):
+        """ return the len of myself """
+        return len(self)
+
+    @cache_readonly
+    def itemsize(self):
+        """ return the size of a single category """
+        return self.categories.itemsize
+
     def reshape(self, new_shape, **kwargs):
         """ compat with .reshape """
         return self
@@ -395,7 +411,8 @@ def _set_codes(self, codes):
     codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc)
 
     def _get_labels(self):
-        """ Get the category labels (deprecated).
+        """
+        Get the category labels (deprecated).
 
         Deprecated, use .codes!
         """
@@ -409,8 +426,10 @@ def _get_labels(self):
 
     @classmethod
     def _validate_categories(cls, categories):
-        """" Validates that we have good categories """
-        if not isinstance(categories, Index):
+        """
+        Validates that we have good categories
+        """
+        if not isinstance(categories, ABCIndexClass):
             dtype = None
             if not hasattr(categories, "dtype"):
                 categories = _convert_to_list_like(categories)
@@ -421,6 +440,8 @@ def _validate_categories(cls, categories):
                     with_na = np.array(categories)
                     if with_na.dtype != without_na.dtype:
                         dtype = "object"
+
+            from pandas import Index
             categories = Index(categories, dtype=dtype)
         if not categories.is_unique:
             raise ValueError('Categorical categories must be unique')
@@ -761,6 +782,8 @@ def remove_unused_categories(self, inplace=False):
         cat = self if inplace else self.copy()
         _used = sorted(np.unique(cat._codes))
         new_categories = cat.categories.take(_ensure_platform_int(_used))
+
+        from pandas.core.index import _ensure_index
         new_categories = _ensure_index(new_categories)
         cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
         cat._categories = new_categories
@@ -790,7 +813,8 @@ def shape(self):
         return tuple([len(self._codes)])
 
     def __array__(self, dtype=None):
-        """ The numpy array interface.
+        """
+        The numpy array interface.
 
         Returns
         -------
@@ -799,7 +823,7 @@ def __array__(self, dtype=None):
             dtype as categorical.categories.dtype
         """
         ret = take_1d(self.categories.values, self._codes)
-        if dtype and dtype != self.categories.dtype:
+        if dtype and not is_dtype_equal(dtype,self.categories.dtype):
             return np.asarray(ret, dtype)
         return ret
 
@@ -997,7 +1021,7 @@ def get_values(self):
         """
 
         # if we are a period index, return a string repr
-        if isinstance(self.categories, PeriodIndex):
+        if isinstance(self.categories, ABCPeriodIndex):
             return take_1d(np.array(self.categories.to_native_types(), dtype=object),
                            self._codes)
 
@@ -1243,7 +1267,8 @@ def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
         return iter(np.array(self))
 
-    def _tidy_repr(self, max_vals=10):
+    def _tidy_repr(self, max_vals=10, footer=True):
+        """ a short repr displaying only max_vals and an optional (but default footer) """
         num = max_vals // 2
         head = self[:num]._get_repr(length=False, name=False, footer=False)
         tail = self[-(max_vals - num):]._get_repr(length=False,
@@ -1251,23 +1276,31 @@ def _tidy_repr(self, max_vals=10):
                                                   footer=False)
 
         result = '%s, ..., %s' % (head[:-1], tail[1:])
-        result = '%s\n%s' % (result, self._repr_footer())
+        if footer:
+            result = '%s\n%s' % (result, self._repr_footer())
 
         return compat.text_type(result)
 
-    def _repr_categories_info(self):
-        """ Returns a string representation of the footer."""
-
+    def _repr_categories(self):
+        """ return the base repr for the categories """
         max_categories = (10 if get_option("display.max_categories") == 0
                     else get_option("display.max_categories"))
+        from pandas.core import format as fmt
         category_strs = fmt.format_array(self.categories.get_values(), None)
         if len(category_strs) > max_categories:
             num = max_categories // 2
             head = category_strs[:num]
             tail = category_strs[-(max_categories - num):]
             category_strs = head + ["..."] + tail
+
         # Strip all leading spaces, which format_array adds for columns...
         category_strs = [x.strip() for x in category_strs]
+        return category_strs
+
+    def _repr_categories_info(self):
+        """ Returns a string representation of the footer."""
+
+        category_strs = self._repr_categories()
         levheader = "Categories (%d, %s): " % (len(self.categories),
                                                self.categories.dtype)
         width, height = get_terminal_size()
@@ -1299,8 +1332,11 @@ def _repr_footer(self):
                                        len(self), self._repr_categories_info())
 
     def _get_repr(self, name=False, length=True, na_rep='NaN', footer=True):
-        formatter = fmt.CategoricalFormatter(self, name=name,
-                                             length=length, na_rep=na_rep,
+        from pandas.core import format as fmt
+        formatter = fmt.CategoricalFormatter(self,
+                                             name=name,
+                                             length=length,
+                                             na_rep=na_rep,
                                              footer=footer)
         result = formatter.to_string()
         return compat.text_type(result)
@@ -1315,9 +1351,9 @@ def __unicode__(self):
                                     name=True)
         else:
             result = '[], %s' % self._get_repr(name=True,
-                                                           length=False,
-                                                           footer=True,
-                                                           ).replace("\n",", ")
+                                               length=False,
+                                               footer=True,
+                                               ).replace("\n",", ")
 
         return result
 
@@ -1358,6 +1394,8 @@ def __setitem__(self, key, value):
                                  "categories")
 
         rvalue = value if is_list_like(value) else [value]
+
+        from pandas import Index
         to_add = Index(rvalue).difference(self.categories)
 
         # no assignments of values not in categories, but it's always ok to set something to np.nan
@@ -1516,11 +1554,27 @@ def equals(self, other):
         -------
         are_equal : boolean
         """
-        if not isinstance(other, Categorical):
-            return False
         # TODO: should this also test if name is equal?
-        return (self.categories.equals(other.categories) and self.ordered == other.ordered and
-                np.array_equal(self._codes, other._codes))
+        return self.is_dtype_equal(other) and np.array_equal(self._codes, other._codes)
+
+    def is_dtype_equal(self, other):
+        """
+        Returns True if categoricals are the same dtype
+          same categories, and same ordered
+
+        Parameters
+        ----------
+        other : Categorical
+
+        Returns
+        -------
+        are_equal : boolean
+        """
+
+        try:
+            return self.categories.equals(other.categories) and self.ordered == other.ordered
+        except (AttributeError, TypeError):
+            return False
 
     def describe(self):
         """ Describes this Categorical
@@ -1604,18 +1658,20 @@ def _delegate_method(self, name, *args, **kwargs):
 ##### utility routines #####
 
 def _get_codes_for_values(values, categories):
-    """"
+    """
     utility routine to turn values into codes given the specified categories
     """
 
     from pandas.core.algorithms import _get_data_algo, _hashtables
-    if values.dtype != categories.dtype:
+    if not is_dtype_equal(values.dtype,categories.dtype):
         values = _ensure_object(values)
         categories = _ensure_object(categories)
+
     (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
-    t = hash_klass(len(categories))
-    t.map_locations(_values_from_object(categories))
-    return _coerce_indexer_dtype(t.lookup(values), categories)
+    (_, _), cats = _get_data_algo(categories, _hashtables)
+    t = hash_klass(len(cats))
+    t.map_locations(cats)
+    return _coerce_indexer_dtype(t.lookup(vals), cats)
 
 def _convert_to_list_like(list_like):
     if hasattr(list_like, "dtype"):
diff --git a/pandas/core/common.py b/pandas/core/common.py
index ffe12d0c1546c..3d23aeff942dc 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -83,6 +83,16 @@ def _check(cls, inst):
 ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",))
 ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",))
 ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",))
+ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
+ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index",
+                                                                 "int64index",
+                                                                 "float64index",
+                                                                 "multiindex",
+                                                                 "datetimeindex",
+                                                                 "timedeltaindex",
+                                                                 "periodindex",
+                                                                 "categoricalindex"))
+
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
 ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
@@ -2455,11 +2465,27 @@ def _get_dtype_type(arr_or_dtype):
         return np.dtype(arr_or_dtype).type
     elif isinstance(arr_or_dtype, CategoricalDtype):
         return CategoricalDtypeType
+    elif isinstance(arr_or_dtype, compat.string_types):
+        if is_categorical_dtype(arr_or_dtype):
+            return CategoricalDtypeType
+        return _get_dtype_type(np.dtype(arr_or_dtype))
     try:
         return arr_or_dtype.dtype.type
     except AttributeError:
         raise ValueError('%r is not a dtype' % arr_or_dtype)
 
+def is_dtype_equal(source, target):
+    """ return a boolean if the dtypes are equal """
+    source = _get_dtype_type(source)
+    target = _get_dtype_type(target)
+
+    try:
+        return source == target
+    except TypeError:
+
+        # invalid comparison
+        # object == category will hit this
+        return False
 
 def is_any_int_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 4ef3bbce85467..e5b1a96f81677 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -14,7 +14,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.index import Index, MultiIndex, _ensure_index, _union_indexes
+from pandas.core.index import Index, MultiIndex, CategoricalIndex, _ensure_index, _union_indexes
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
 from pandas.core.panel import Panel
@@ -1928,7 +1928,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 self.grouper = com._asarray_tuplesafe(self.grouper)
 
             # a passed Categorical
-            elif isinstance(self.grouper, Categorical):
+            elif is_categorical_dtype(self.grouper):
 
                 # must have an ordered categorical
                 if self.sort:
@@ -1942,8 +1942,15 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 # fix bug #GH8868 sort=False being ignored in categorical groupby
                 else:
                     self.grouper = self.grouper.reorder_categories(self.grouper.unique())
+
+                # we make a CategoricalIndex out of the cat grouper
+                # preserving the categories / ordered attributes
                 self._labels = self.grouper.codes
-                self._group_index = self.grouper.categories
+
+                c = self.grouper.categories
+                self._group_index = CategoricalIndex(Categorical.from_codes(np.arange(len(c)),
+                                                     categories=c,
+                                                     ordered=self.grouper.ordered))
                 if self.name is None:
                     self.name = self.grouper.name
 
@@ -2131,8 +2138,8 @@ def is_in_obj(gpr):
         else:
             in_axis, name = False, None
 
-        if isinstance(gpr, Categorical) and len(gpr) != len(obj):
-            raise ValueError("Categorical grouper must have len(grouper) == len(data)")
+        if is_categorical_dtype(gpr) and len(gpr) != len(obj):
+            raise ValueError("Categorical dtype grouper must have len(grouper) == len(data)")
 
         ping = Grouping(group_axis, gpr, obj=obj, name=name,
                         level=level, sort=sort, in_axis=in_axis)
@@ -3252,7 +3259,7 @@ def _reindex_output(self, result):
             return result
         elif len(groupings) == 1:
             return result
-        elif not any([isinstance(ping.grouper, Categorical)
+        elif not any([isinstance(ping.grouper, (Categorical, CategoricalIndex))
                       for ping in groupings]):
             return result
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 8b509c6876ec7..8b650fea9b440 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -2,6 +2,7 @@
 import datetime
 import warnings
 import operator
+
 from functools import partial
 from pandas.compat import range, zip, lrange, lzip, u, reduce, filter, map
 from pandas import compat
@@ -13,13 +14,13 @@
 import pandas.algos as _algos
 import pandas.index as _index
 from pandas.lib import Timestamp, Timedelta, is_datetime_array
-from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs
+from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs, PandasDelegate
 from pandas.util.decorators import (Appender, Substitution, cache_readonly,
                                     deprecate)
-from pandas.core.common import isnull, array_equivalent
 import pandas.core.common as com
-from pandas.core.common import (_values_from_object, is_float, is_integer,
-                                ABCSeries, _ensure_object, _ensure_int64, is_bool_indexer,
+from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype,
+                                _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
+                                ABCSeries, ABCCategorical, _ensure_object, _ensure_int64, is_bool_indexer,
                                 is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
 from pandas.core.config import get_option
 from pandas.io.common import PerformanceWarning
@@ -44,26 +45,6 @@ def _try_get_item(x):
     except AttributeError:
         return x
 
-def _indexOp(opname):
-    """
-    Wrapper function for index comparison operations, to avoid
-    code duplication.
-    """
-    def wrapper(self, other):
-        func = getattr(self.values, opname)
-        result = func(np.asarray(other))
-
-        # technically we could support bool dtyped Index
-        # for now just return the indexing array directly
-        if is_bool_dtype(result):
-            return result
-        try:
-            return Index(result)
-        except:  # pragma: no cover
-            return result
-    return wrapper
-
-
 class InvalidIndexError(Exception):
     pass
 
@@ -162,6 +143,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
                 return Float64Index(data, copy=copy, dtype=dtype, name=name)
             elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
                 subarr = data.astype('object')
+            elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
+                return CategoricalIndex(data, copy=copy, name=name, **kwargs)
             else:
                 subarr = com._asarray_tuplesafe(data, dtype=object)
 
@@ -170,6 +153,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
             if copy:
                 subarr = subarr.copy()
 
+        elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
+            return CategoricalIndex(data, copy=copy, name=name, **kwargs)
         elif hasattr(data, '__array__'):
             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
                          **kwargs)
@@ -258,7 +243,7 @@ def __len__(self):
         """
         return len(self._data)
 
-    def __array__(self, result=None):
+    def __array__(self, dtype=None):
         """ the array interface, return my values """
         return self._data.view(np.ndarray)
 
@@ -282,9 +267,6 @@ def get_values(self):
         """ return the underlying data as an ndarray """
         return self.values
 
-    def _array_values(self):
-        return self._data
-
     # ops compat
     def tolist(self):
         """
@@ -410,8 +392,7 @@ def __unicode__(self):
         Invoked by unicode(df) in py2 only. Yields a Unicode String in both
         py2/py3.
         """
-        prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'),
-                                 quote_strings=True)
+        prepr = default_pprint(self)
         return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
 
     def to_series(self, **kwargs):
@@ -429,9 +410,10 @@ def to_series(self, **kwargs):
 
     def _to_embed(self, keep_tz=False):
         """
+        *this is an internal non-public method*
+
         return an array repr of this object, potentially casting to object
 
-        This is for internal compat
         """
         return self.values
 
@@ -623,7 +605,10 @@ def is_numeric(self):
         return self.inferred_type in ['integer', 'floating']
 
     def is_object(self):
-        return self.dtype == np.object_
+        return is_object_dtype(self.dtype)
+
+    def is_categorical(self):
+        return self.inferred_type in ['categorical']
 
     def is_mixed(self):
         return 'mixed' in self.inferred_type
@@ -772,14 +757,11 @@ def is_int(v):
 
         return indexer
 
-    def _convert_list_indexer(self, key, kind=None):
-        """ convert a list indexer. these should be locations """
-        return key
-
-    def _convert_list_indexer_for_mixed(self, keyarr, kind=None):
-        """ passed a key that is tuplesafe that is integer based
-            and we have a mixed index (e.g. number/labels). figure out
-            the indexer. return None if we can't help
+    def _convert_list_indexer(self, keyarr, kind=None):
+        """
+        passed a key that is tuplesafe that is integer based
+        and we have a mixed index (e.g. number/labels). figure out
+        the indexer. return None if we can't help
         """
         if (kind is None or kind in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()):
             if self.inferred_type != 'integer':
@@ -954,17 +936,13 @@ def __getitem__(self, key):
         else:
             return result
 
-    def append(self, other):
+    def _ensure_compat_append(self, other):
         """
-        Append a collection of Index options together
-
-        Parameters
-        ----------
-        other : Index or list/tuple of indices
+        prepare the append
 
         Returns
         -------
-        appended : Index
+        list of to_concat, name of result Index
         """
         name = self.name
         to_concat = [self]
@@ -984,7 +962,21 @@ def append(self, other):
         to_concat = self._ensure_compat_concat(to_concat)
         to_concat = [x.values if isinstance(x, Index) else x
                      for x in to_concat]
+        return to_concat, name
+
+    def append(self, other):
+        """
+        Append a collection of Index options together
+
+        Parameters
+        ----------
+        other : Index or list/tuple of indices
 
+        Returns
+        -------
+        appended : Index
+        """
+        to_concat, name = self._ensure_compat_append(other)
         return Index(np.concatenate(to_concat), name=name)
 
     @staticmethod
@@ -1046,10 +1038,12 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
 
         from pandas.core.format import format_array
 
-        if values.dtype == np.object_:
+        if is_categorical_dtype(values.dtype):
+            values = np.array(values)
+        elif is_object_dtype(values.dtype):
             values = lib.maybe_convert_objects(values, safe=1)
 
-        if values.dtype == np.object_:
+        if is_object_dtype(values.dtype):
             result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
                       for x in values]
 
@@ -1092,9 +1086,6 @@ def equals(self, other):
         if not isinstance(other, Index):
             return False
 
-        if type(other) != Index:
-            return other.equals(self)
-
         return array_equivalent(_values_from_object(self), _values_from_object(other))
 
     def identical(self, other):
@@ -1201,13 +1192,6 @@ def __sub__(self, other):
                           "use .difference()",FutureWarning)
         return self.difference(other)
 
-    __eq__ = _indexOp('__eq__')
-    __ne__ = _indexOp('__ne__')
-    __lt__ = _indexOp('__lt__')
-    __gt__ = _indexOp('__gt__')
-    __le__ = _indexOp('__le__')
-    __ge__ = _indexOp('__ge__')
-
     def __and__(self, other):
         return self.intersection(other)
 
@@ -1240,7 +1224,7 @@ def union(self, other):
 
         self._assert_can_do_setop(other)
 
-        if self.dtype != other.dtype:
+        if not is_dtype_equal(self.dtype,other.dtype):
             this = self.astype('O')
             other = other.astype('O')
             return this.union(other)
@@ -1314,7 +1298,7 @@ def intersection(self, other):
         if self.equals(other):
             return self
 
-        if self.dtype != other.dtype:
+        if not is_dtype_equal(self.dtype,other.dtype):
             this = self.astype('O')
             other = other.astype('O')
             return this.intersection(other)
@@ -1473,7 +1457,7 @@ def get_value(self, series, key):
                 raise
             except TypeError:
                 # generator/iterator-like
-                if com.is_iterator(key):
+                if is_iterator(key):
                     raise InvalidIndexError(key)
                 else:
                     raise e1
@@ -1548,7 +1532,7 @@ def get_indexer(self, target, method=None, limit=None):
         if pself is not self or ptarget is not target:
             return pself.get_indexer(ptarget, method=method, limit=limit)
 
-        if self.dtype != target.dtype:
+        if not is_dtype_equal(self.dtype,target.dtype):
             this = self.astype(object)
             target = target.astype(object)
             return this.get_indexer(target, method=method, limit=limit)
@@ -1647,7 +1631,8 @@ def get_indexer_for(self, target, **kwargs):
         """ guaranteed return of an indexer even when non-unique """
         if self.is_unique:
             return self.get_indexer(target, **kwargs)
-        return self.get_indexer_non_unique(target, **kwargs)[0]
+        indexer, _ = self.get_indexer_non_unique(target, **kwargs)
+        return indexer
 
     def _possibly_promote(self, other):
         # A hack, but it works
@@ -1655,7 +1640,7 @@ def _possibly_promote(self, other):
         if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
             return DatetimeIndex(self), other
         elif self.inferred_type == 'boolean':
-            if self.dtype != 'object':
+            if not is_object_dtype(self.dtype):
                 return self.astype('object'), other.astype('object')
         return self, other
 
@@ -1707,12 +1692,35 @@ def isin(self, values, level=None):
         value_set = set(values)
         if level is not None:
             self._validate_index_level(level)
-        return lib.ismember(self._array_values(), value_set)
+        return lib.ismember(np.array(self), value_set)
+
+    def _can_reindex(self, indexer):
+        """
+        *this is an internal non-public method*
+
+        Check if we are allowing reindexing with this particular indexer
+
+        Parameters
+        ----------
+        indexer : an integer indexer
+
+        Raises
+        ------
+        ValueError if its a duplicate axis
+        """
+
+        # trying to reindex on an axis with duplicates
+        if not self.is_unique and len(indexer):
+            raise ValueError("cannot reindex from a duplicate axis")
 
     def reindex(self, target, method=None, level=None, limit=None):
         """
         Create index with target's values (move/add/delete values as necessary)
 
+        Parameters
+        ----------
+        target : an iterable
+
         Returns
         -------
         new_index : pd.Index
@@ -1733,6 +1741,7 @@ def reindex(self, target, method=None, level=None, limit=None):
             target = self._simple_new(np.empty(0, dtype=self.dtype), **attrs)
         else:
             target = _ensure_index(target)
+
         if level is not None:
             if method is not None:
                 raise TypeError('Fill method not supported if level passed')
@@ -1757,9 +1766,72 @@ def reindex(self, target, method=None, level=None, limit=None):
 
         return target, indexer
 
+    def _reindex_non_unique(self, target):
+        """
+        *this is an internal non-public method*
+
+        Create a new index with target's values (move/add/delete values as necessary)
+        use with non-unique Index and a possibly non-unique target
+
+        Parameters
+        ----------
+        target : an iterable
+
+        Returns
+        -------
+        new_index : pd.Index
+            Resulting index
+        indexer : np.ndarray or None
+            Indices of output values in original index
+
+        """
+
+        target = _ensure_index(target)
+        indexer, missing = self.get_indexer_non_unique(target)
+        check = indexer != -1
+        new_labels = self.take(indexer[check])
+        new_indexer = None
+
+        if len(missing):
+            l = np.arange(len(indexer))
+
+            missing = com._ensure_platform_int(missing)
+            missing_labels = target.take(missing)
+            missing_indexer = com._ensure_int64(l[~check])
+            cur_labels = self.take(indexer[check]).values
+            cur_indexer = com._ensure_int64(l[check])
+
+            new_labels = np.empty(tuple([len(indexer)]), dtype=object)
+            new_labels[cur_indexer] = cur_labels
+            new_labels[missing_indexer] = missing_labels
+
+            # a unique indexer
+            if target.is_unique:
+
+                # see GH5553, make sure we use the right indexer
+                new_indexer = np.arange(len(indexer))
+                new_indexer[cur_indexer] = np.arange(len(cur_labels))
+                new_indexer[missing_indexer] = -1
+
+            # we have a non_unique selector, need to use the original
+            # indexer here
+            else:
+
+                # need to retake to have the same size as the indexer
+                indexer = indexer.values
+                indexer[~check] = 0
+
+                # reset the new indexer to account for the new size
+                new_indexer = np.arange(len(self.take(indexer)))
+                new_indexer[~check] = -1
+
+        return self._shallow_copy(new_labels), indexer, new_indexer
+
     def join(self, other, how='left', level=None, return_indexers=False):
         """
-        Internal API method. Compute join_index and indexers to conform data
+        *this is an internal non-public method*
+
+        Compute join_index and indexers to conform data
         structures to the new index.
 
         Parameters
@@ -1818,7 +1890,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
                 result = x, z, y
             return result
 
-        if self.dtype != other.dtype:
+        if not is_dtype_equal(self.dtype,other.dtype):
             this = self.astype('O')
             other = other.astype('O')
             return this.join(other, how=how,
@@ -2369,6 +2441,34 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
     def _evaluate_with_datetime_like(self, other, op, opstr):
         raise TypeError("can only perform ops with datetime like values")
 
+    @classmethod
+    def _add_comparison_methods(cls):
+        """ add in comparison methods """
+
+        def _make_compare(op):
+
+            def _evaluate_compare(self, other):
+                func = getattr(self.values, op)
+                result = func(np.asarray(other))
+
+                # technically we could support bool dtyped Index
+                # for now just return the indexing array directly
+                if is_bool_dtype(result):
+                    return result
+                try:
+                    return Index(result)
+                except TypeError:
+                    return result
+
+            return _evaluate_compare
+
+        cls.__eq__ = _make_compare('__eq__')
+        cls.__ne__ = _make_compare('__ne__')
+        cls.__lt__ = _make_compare('__lt__')
+        cls.__gt__ = _make_compare('__gt__')
+        cls.__le__ = _make_compare('__le__')
+        cls.__ge__ = _make_compare('__ge__')
+
     @classmethod
     def _add_numeric_methods_disabled(cls):
         """ add in numeric methods to disable """
@@ -2423,7 +2523,7 @@ def _evaluate_numeric_binop(self, other):
                 elif isinstance(other, (Timestamp, np.datetime64)):
                     return self._evaluate_with_datetime_like(other, op, opstr)
                 else:
-                    if not (com.is_float(other) or com.is_integer(other)):
+                    if not (is_float(other) or is_integer(other)):
                         raise TypeError("can only perform ops with scalar values")
 
                 # if we are a reversed non-communative op
@@ -2487,7 +2587,7 @@ def _make_logical_function(name, desc, f):
             @Appender(_doc)
             def logical_func(self, *args, **kwargs):
                 result = f(self.values)
-                if isinstance(result, (np.ndarray, com.ABCSeries, Index)) \
+                if isinstance(result, (np.ndarray, ABCSeries, Index)) \
                    and result.ndim == 0:
                     # return NumPy type
                     return result.dtype.type(result.item())
@@ -2519,6 +2619,539 @@ def invalid_op(self, other=None):
 
 Index._add_numeric_methods_disabled()
 Index._add_logical_methods()
+Index._add_comparison_methods()
+
+class CategoricalIndex(Index, PandasDelegate):
+    """
+
+    Immutable Index implementing an ordered, sliceable set. CategoricalIndex
+    represents a sparsely populated Index with an underlying Categorical.
+
+    Parameters
+    ----------
+    data : array-like or Categorical, (1-dimensional)
+    categories : optional, array-like
+        categories for the CategoricalIndex
+    ordered : boolean,
+        designating if the categories are ordered
+    copy : bool
+        Make a copy of input ndarray
+    name : object
+        Name to be stored in the index
+
+    """
+
+    _typ = 'categoricalindex'
+    _engine_type = _index.Int64Engine
+    _attributes = ['name','categories','ordered']
+
+    def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs):
+
+        if fastpath:
+            return cls._simple_new(data, name=name)
+
+        if isinstance(data, ABCCategorical):
+            data = cls._create_categorical(cls, data, categories, ordered)
+        elif isinstance(data, CategoricalIndex):
+            data = data._data
+            data = cls._create_categorical(cls, data, categories, ordered)
+        else:
+
+            # don't allow scalars
+            # if data is None, then categories must be provided
+            if lib.isscalar(data):
+                if data is not None or categories is None:
+                    cls._scalar_data_error(data)
+                data = []
+            data = cls._create_categorical(cls, data, categories, ordered)
+
+        if copy:
+            data = data.copy()
+
+        return cls._simple_new(data, name=name)
+
+    def _create_from_codes(self, codes, categories=None, ordered=None, name=None):
+        """
+        *this is an internal non-public method*
+
+        create the correct categorical from codes
+
+        Parameters
+        ----------
+        codes : new codes
+        categories : optional categories, defaults to existing
+        ordered : optional ordered attribute, defaults to existing
+        name : optional name attribute, defaults to existing
+
+        Returns
+        -------
+        CategoricalIndex
+        """
+
+        from pandas.core.categorical import Categorical
+        if categories is None:
+            categories = self.categories
+        if ordered is None:
+            ordered = self.ordered
+        if name is None:
+            name = self.name
+        cat = Categorical.from_codes(codes, categories=categories, ordered=self.ordered)
+        return CategoricalIndex(cat, name=name)
+
+    @staticmethod
+    def _create_categorical(self, data, categories=None, ordered=None):
+        """
+        *this is an internal non-public method*
+
+        create the correct categorical from data and the properties
+
+        Parameters
+        ----------
+        data : data for new Categorical
+        categories : optional categories, defaults to existing
+        ordered : optional ordered attribute, defaults to existing
+
+        Returns
+        -------
+        Categorical
+        """
+
+        if not isinstance(data, ABCCategorical):
+            from pandas.core.categorical import Categorical
+            data = Categorical(data, categories=categories, ordered=ordered)
+        else:
+            if categories is not None:
+                data = data.set_categories(categories)
+            if ordered is not None:
+                data = data.set_ordered(ordered)
+        return data
+
+    @classmethod
+    def _simple_new(cls, values, name=None, categories=None, ordered=None, **kwargs):
+        result = object.__new__(cls)
+
+        values = cls._create_categorical(cls, values, categories, ordered)
+        result._data = values
+        result.name = name
+        for k, v in compat.iteritems(kwargs):
+            setattr(result,k,v)
+
+        result._reset_identity()
+        return result
+
+    def _is_dtype_compat(self, other):
+        """
+        *this is an internal non-public method*
+
+        provide a comparison between the dtype of self and other (coercing if needed)
+
+        Raises
+        ------
+        TypeError if the dtypes are not compatible
+        """
+
+        if is_categorical_dtype(other):
+            if isinstance(other, CategoricalIndex):
+                other = other.values
+            if not other.is_dtype_equal(self):
+                raise TypeError("categories must match existing categories when appending")
+        else:
+            values = other
+            other = CategoricalIndex(self._create_categorical(self, other, categories=self.categories, ordered=self.ordered))
+            if not other.isin(values).all():
+                raise TypeError("cannot append a non-category item to a CategoricalIndex")
+
+        return other
+
+    def equals(self, other):
+        """
+        Determines if two CategorialIndex objects contain the same elements.
+        """
+        if self.is_(other):
+            return True
+
+        try:
+            other = self._is_dtype_compat(other)
+            return array_equivalent(self._data, other)
+        except (TypeError, ValueError):
+            pass
+
+        return False
+
+    def __unicode__(self):
+        """
+        Return a string representation for this object.
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
+        py2/py3.
+        """
+
+        # currently doesn't use the display.max_categories, or display.max_seq_len
+        # for head/tail printing
+        values = default_pprint(self.values.get_values())
+        cats = default_pprint(self.categories.get_values())
+        space = ' ' * (len(self.__class__.__name__) + 1)
+        name = self.name
+        if name is not None:
+            name = default_pprint(name)
+
+        result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format(
+            klass=self.__class__.__name__,
+            values=values,
+            categories=cats,
+            ordered=self.ordered,
+            name=name,
+            space=space)
+
+        return result
+
+    @property
+    def inferred_type(self):
+        return 'categorical'
+
+    @property
+    def values(self):
+        """ return the underlying data, which is a Categorical """
+        return self._data
+
+    @property
+    def codes(self):
+        return self._data.codes
+
+    @property
+    def categories(self):
+        return self._data.categories
+
+    @property
+    def ordered(self):
+        return self._data.ordered
+
+    def __contains__(self, key):
+        hash(key)
+        return key in self.values
+
+    def __array__(self, dtype=None):
+        """ the array interface, return my values """
+        return np.array(self._data, dtype=dtype)
+
+    def argsort(self, *args, **kwargs):
+        return self.values.argsort(*args, **kwargs)
+
+    @cache_readonly
+    def _engine(self):
+
+        # we are going to look things up with the codes themselves
+        return self._engine_type(lambda: self.codes.astype('i8'), len(self))
+
+    @cache_readonly
+    def is_unique(self):
+        return not self.duplicated().any()
+
+    @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
+    def duplicated(self, take_last=False):
+        from pandas.hashtable import duplicated_int64
+        return duplicated_int64(self.codes.astype('i8'), take_last)
+
+    def get_loc(self, key, method=None):
+        """
+        Get integer location for requested label
+
+        Parameters
+        ----------
+        key : label
+        method : {None}
+            * default: exact matches only.
+
+        Returns
+        -------
+        loc : int if unique index, possibly slice or mask if not
+        """
+        codes = self.categories.get_loc(key)
+        if (codes == -1):
+            raise KeyError(key)
+        indexer, _ = self._engine.get_indexer_non_unique(np.array([codes]))
+        if (indexer==-1).any():
+            raise KeyError(key)
+
+        return indexer
+
+    def _can_reindex(self, indexer):
+        """ always allow reindexing """
+        pass
+
+    def reindex(self, target, method=None, level=None, limit=None):
+        """
+        Create index with target's values (move/add/delete values as necessary)
+
+        Returns
+        -------
+        new_index : pd.Index
+            Resulting index
+        indexer : np.ndarray or None
+            Indices of output values in original index
+
+        """
+
+        if method is not None:
+            raise NotImplementedError("argument method is not implemented for CategoricalIndex.reindex")
+        if level is not None:
+            raise NotImplementedError("argument level is not implemented for CategoricalIndex.reindex")
+        if limit is not None:
+            raise NotImplementedError("argument limit is not implemented for CategoricalIndex.reindex")
+
+        target = _ensure_index(target)
+
+        if not is_categorical_dtype(target) and not target.is_unique:
+            raise ValueError("cannot reindex with a non-unique indexer")
+
+        indexer, missing = self.get_indexer_non_unique(np.array(target))
+        new_target = self.take(indexer)
+
+
+        # filling in missing if needed
+        if len(missing):
+            cats = self.categories.get_indexer(target)
+            if (cats==-1).any():
+
+                # coerce to a regular index here!
+                result = Index(np.array(self),name=self.name)
+                new_target, indexer, _ = result._reindex_non_unique(np.array(target))
+
+            else:
+
+                codes = new_target.codes.copy()
+                codes[indexer==-1] = cats[missing]
+                new_target = self._create_from_codes(codes)
+
+        # we always want to return an Index type here
+        # to be consistent with .reindex for other index types (e.g. they don't coerce
+        # based on the actual values, only on the dtype)
+        # unless we had an inital Categorical to begin with
+        # in which case we are going to conform to the passed Categorical
+        new_target = np.asarray(new_target)
+        if is_categorical_dtype(target):
+            new_target = target._shallow_copy(new_target, name=self.name)
+        else:
+            new_target = Index(new_target, name=self.name)
+
+        return new_target, indexer
+
+    def _reindex_non_unique(self, target):
+        """ reindex from a non-unique; which CategoricalIndex's are almost always """
+        new_target, indexer = self.reindex(target)
+        new_indexer = None
+
+        check = indexer==-1
+        if check.any():
+            new_indexer = np.arange(len(self.take(indexer)))
+            new_indexer[check] = -1
+
+        return new_target, indexer, new_indexer
+
+    def get_indexer(self, target, method=None, limit=None):
+        """
+        Compute indexer and mask for new index given the current index. The
+        indexer should be then used as an input to ndarray.take to align the
+        current data to the new index. The mask determines whether labels are
+        found or not in the current index
+
+        Parameters
+        ----------
+        target : MultiIndex or Index (of tuples)
+        method : {'pad', 'ffill', 'backfill', 'bfill'}
+            pad / ffill: propagate LAST valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+
+        Notes
+        -----
+        This is a low-level method and probably should be used at your own risk
+
+        Examples
+        --------
+        >>> indexer, mask = index.get_indexer(new_index)
+        >>> new_values = cur_values.take(indexer)
+        >>> new_values[-mask] = np.nan
+
+        Returns
+        -------
+        (indexer, mask) : (ndarray, ndarray)
+        """
+        method = com._clean_reindex_fill_method(method)
+        target = _ensure_index(target)
+
+        if isinstance(target, CategoricalIndex):
+            target = target.categories
+
+        if method == 'pad' or method == 'backfill':
+            raise NotImplementedError("method='pad' and method='backfill' not implemented yet "
+                                      'for CategoricalIndex')
+        elif method == 'nearest':
+            raise NotImplementedError("method='nearest' not implemented yet "
+                                      'for CategoricalIndex')
+        else:
+
+            codes = self.categories.get_indexer(target)
+            indexer, _ = self._engine.get_indexer_non_unique(codes)
+
+        return com._ensure_platform_int(indexer)
+
+    def get_indexer_non_unique(self, target):
+        """ this is the same for a CategoricalIndex for get_indexer; the API returns the missing values as well """
+        target = _ensure_index(target)
+
+        if isinstance(target, CategoricalIndex):
+            target = target.categories
+
+        codes = self.categories.get_indexer(target)
+        return self._engine.get_indexer_non_unique(codes)
+
+    def _convert_list_indexer(self, keyarr, kind=None):
+        """
+        we are passed a list indexer.
+        Return our indexer or raise if all of the values are not included in the categories
+        """
+        codes = self.categories.get_indexer(keyarr)
+        if (codes==-1).any():
+            raise KeyError("a list-indexer must only include values that are in the categories")
+
+        return None
+
+    def take(self, indexer, axis=0):
+        """
+        return a new CategoricalIndex of the values selected by the indexer
+
+        See also
+        --------
+        numpy.ndarray.take
+        """
+
+        indexer = com._ensure_platform_int(indexer)
+        taken = self.codes.take(indexer)
+        return self._create_from_codes(taken)
+
+    def delete(self, loc):
+        """
+        Make new Index with passed location(-s) deleted
+
+        Returns
+        -------
+        new_index : Index
+        """
+        return self._create_from_codes(np.delete(self.codes, loc))
+
+    def insert(self, loc, item):
+        """
+        Make new Index inserting new item at location. Follows
+        Python list.append semantics for negative values
+
+        Parameters
+        ----------
+        loc : int
+        item : object
+
+        Returns
+        -------
+        new_index : Index
+
+        Raises
+        ------
+        ValueError if the item is not in the categories
+
+        """
+        code = self.categories.get_indexer([item])
+        if (code == -1):
+            raise TypeError("cannot insert an item into a CategoricalIndex that is not already an existing category")
+
+        codes = self.codes
+        codes = np.concatenate(
+            (codes[:loc], code, codes[loc:]))
+        return self._create_from_codes(codes)
+
+    def append(self, other):
+        """
+        Append a collection of CategoricalIndex options together
+
+        Parameters
+        ----------
+        other : Index or list/tuple of indices
+
+        Returns
+        -------
+        appended : Index
+
+        Raises
+        ------
+        ValueError if other is not in the categories
+        """
+        to_concat, name = self._ensure_compat_append(other)
+        to_concat = [ self._is_dtype_compat(c) for c in to_concat ]
+        codes = np.concatenate([ c.codes for c in to_concat ])
+        return self._create_from_codes(codes, name=name)
+
+    @classmethod
+    def _add_comparison_methods(cls):
+        """ add in comparison methods """
+
+        def _make_compare(op):
+
+            def _evaluate_compare(self, other):
+
+                # if we have a Categorical type, then must have the same categories
+                if isinstance(other, CategoricalIndex):
+                    other = other.values
+                elif isinstance(other, Index):
+                    other = self._create_categorical(self, other.values, categories=self.categories, ordered=self.ordered)
+
+                if isinstance(other, ABCCategorical):
+                    if not (self.values.is_dtype_equal(other) and len(self.values) == len(other)):
+                        raise TypeError("categorical index comparisions must have the same categories and ordered attributes")
+
+                return getattr(self.values, op)(other)
+
+            return _evaluate_compare
+
+        cls.__eq__ = _make_compare('__eq__')
+        cls.__ne__ = _make_compare('__ne__')
+        cls.__lt__ = _make_compare('__lt__')
+        cls.__gt__ = _make_compare('__gt__')
+        cls.__le__ = _make_compare('__le__')
+        cls.__ge__ = _make_compare('__ge__')
+
+
+    def _delegate_method(self, name, *args, **kwargs):
+        """ method delegation to the .values """
+        method = getattr(self.values, name)
+        if 'inplace' in kwargs:
+            raise ValueError("cannot use inplace with CategoricalIndex")
+        res = method(*args, **kwargs)
+        if lib.isscalar(res):
+            return res
+        return CategoricalIndex(res, name=self.name)
+
+    @classmethod
+    def _add_accessors(cls):
+        """ add in Categorical accessor methods """
+
+        from pandas.core.categorical import Categorical
+        CategoricalIndex._add_delegate_accessors(delegate=Categorical,
+                                                 accessors=["rename_categories",
+                                                            "reorder_categories",
+                                                            "add_categories",
+                                                            "remove_categories",
+                                                            "remove_unused_categories",
+                                                            "set_categories",
+                                                            "as_ordered",
+                                                            "as_unordered",
+                                                            "min",
+                                                            "max"],
+                                                 typ='method',
+                                                 overwrite=True)
+
+
+CategoricalIndex._add_numeric_methods_disabled()
+CategoricalIndex._add_logical_methods_disabled()
+CategoricalIndex._add_comparison_methods()
+CategoricalIndex._add_accessors()
 
 
 class NumericIndex(Index):
@@ -2791,7 +3424,7 @@ def equals(self, other):
         try:
             if not isinstance(other, Float64Index):
                 other = self._constructor(other)
-            if self.dtype != other.dtype or self.shape != other.shape:
+            if not is_dtype_equal(self.dtype,other.dtype) or self.shape != other.shape:
                 return False
             left, right = self.values, other.values
             return ((left == right) | (self._isnan & other._isnan)).all()
@@ -2857,7 +3490,7 @@ def isin(self, values, level=None):
         value_set = set(values)
         if level is not None:
             self._validate_index_level(level)
-        return lib.ismember_nans(self._array_values(), value_set,
+        return lib.ismember_nans(np.array(self), value_set,
                                  isnull(list(value_set)).any())
 
 
@@ -3197,7 +3830,7 @@ def copy(self, names=None, dtype=None, levels=None, labels=None,
                           verify_integrity=False,
                           _set_identity=_set_identity)
 
-    def __array__(self, result=None):
+    def __array__(self, dtype=None):
         """ the array interface, return my values """
         return self.values
 
@@ -3209,10 +3842,6 @@ def view(self, cls=None):
 
     _shallow_copy = view
 
-    def _array_values(self):
-        # hack for various methods
-        return self.values
-
     @cache_readonly
     def dtype(self):
         return np.dtype('O')
@@ -3359,7 +3988,7 @@ def values(self):
                 taken = com.take_1d(lev._box_values(lev.values), lab,
                                     fill_value=_get_na_value(lev.dtype.type))
             else:
-                taken = com.take_1d(lev.values, lab)
+                taken = com.take_1d(np.asarray(lev.values), lab)
             values.append(taken)
 
         self._tuples = lib.fast_zip(values)
@@ -3424,7 +4053,7 @@ def _try_mi(k):
                 raise
             except TypeError:
                 # generator/iterator-like
-                if com.is_iterator(key):
+                if is_iterator(key):
                     raise InvalidIndexError(key)
                 else:
                     raise e1
@@ -4095,7 +4724,7 @@ def get_indexer(self, target, method=None, limit=None):
         if isinstance(target, MultiIndex):
             target_index = target._tuple_index
 
-        if target_index.dtype != object:
+        if not is_object_dtype(target_index.dtype):
             return np.ones(len(target_index)) * -1
 
         if not self.is_unique:
@@ -4654,9 +5283,9 @@ def equals(self, other):
             return False
 
         for i in range(self.nlevels):
-            svalues = com.take_nd(self.levels[i].values, self.labels[i],
+            svalues = com.take_nd(np.asarray(self.levels[i].values), self.labels[i],
                                   allow_fill=False)
-            ovalues = com.take_nd(other.levels[i].values, other.labels[i],
+            ovalues = com.take_nd(np.asarray(other.levels[i].values), other.labels[i],
                                   allow_fill=False)
             if not array_equivalent(svalues, ovalues):
                 return False
@@ -4772,7 +5401,7 @@ def _assert_can_do_setop(self, other):
         pass
 
     def astype(self, dtype):
-        if np.dtype(dtype) != np.object_:
+        if not is_object_dtype(np.dtype(dtype)):
             raise TypeError('Setting %s dtype to anything other than object '
                             'is not supported' % self.__class__)
         return self._shallow_copy()
@@ -4852,7 +5481,7 @@ def _wrap_joined_index(self, joined, other):
     @Appender(Index.isin.__doc__)
     def isin(self, values, level=None):
         if level is None:
-            return lib.ismember(self._array_values(), set(values))
+            return lib.ismember(np.array(self), set(values))
         else:
             num = self._get_level_number(level)
             levs = self.levels[num]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 8154eb1bb6c8b..41950bf8b0e88 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -253,7 +253,7 @@ def _setitem_with_indexer(self, indexer, value):
                     # just replacing the block manager here
                     # so the object is the same
                     index = self.obj._get_axis(i)
-                    labels = safe_append_to_index(index, key)
+                    labels = index.insert(len(index),key)
                     self.obj._data = self.obj.reindex_axis(labels, i)._data
                     self.obj._maybe_update_cacher(clear=True)
                     self.obj.is_copy=None
@@ -274,10 +274,7 @@ def _setitem_with_indexer(self, indexer, value):
                 # and set inplace
                 if self.ndim == 1:
                     index = self.obj.index
-                    if len(index) == 0:
-                        new_index = Index([indexer])
-                    else:
-                        new_index = safe_append_to_index(index, indexer)
+                    new_index = index.insert(len(index),indexer)
 
                     # this preserves dtype of the value
                     new_values = Series([value]).values
@@ -928,24 +925,6 @@ def _getitem_iterable(self, key, axis=0):
 
         labels = self.obj._get_axis(axis)
 
-        def _reindex(keys, level=None):
-
-            try:
-                result = self.obj.reindex_axis(keys, axis=axis, level=level)
-            except AttributeError:
-                # Series
-                if axis != 0:
-                    raise AssertionError('axis must be 0')
-                return self.obj.reindex(keys, level=level)
-
-            # this is an error as we are trying to find
-            # keys in a multi-index that don't exist
-            if isinstance(labels, MultiIndex) and level is not None:
-                if hasattr(result,'ndim') and not np.prod(result.shape) and len(keys):
-                    raise KeyError("cannot index a multi-index axis with these keys")
-
-            return result
-
         if is_bool_indexer(key):
             key = check_bool_indexer(labels, key)
             inds, = key.nonzero()
@@ -958,8 +937,9 @@ def _reindex(keys, level=None):
                 # asarray can be unsafe, NumPy strings are weird
                 keyarr = _asarray_tuplesafe(key)
 
-            # handle a mixed integer scenario
-            indexer = labels._convert_list_indexer_for_mixed(keyarr, kind=self.name)
+            # have the index handle the indexer and possibly return
+            # an indexer or raising
+            indexer = labels._convert_list_indexer(keyarr, kind=self.name)
             if indexer is not None:
                 return self.obj.take(indexer, axis=axis)
 
@@ -970,65 +950,48 @@ def _reindex(keys, level=None):
             else:
                 level = None
 
-            keyarr_is_unique = Index(keyarr).is_unique
+            # existing labels are unique and indexer are unique
+            if labels.is_unique and Index(keyarr).is_unique:
+
+                try:
+                    result = self.obj.reindex_axis(keyarr, axis=axis, level=level)
+
+                    # this is an error as we are trying to find
+                    # keys in a multi-index that don't exist
+                    if isinstance(labels, MultiIndex) and level is not None:
+                        if hasattr(result,'ndim') and not np.prod(result.shape) and len(keyarr):
+                            raise KeyError("cannot index a multi-index axis with these keys")
+
+                    return result
 
-            # existing labels are unique and indexer is unique
-            if labels.is_unique and keyarr_is_unique:
-                return _reindex(keyarr, level=level)
+                except AttributeError:
 
+                    # Series
+                    if axis != 0:
+                        raise AssertionError('axis must be 0')
+                    return self.obj.reindex(keyarr, level=level)
+
+            # existing labels are non-unique
             else:
-                indexer, missing = labels.get_indexer_non_unique(keyarr)
-                check = indexer != -1
-                result = self.obj.take(indexer[check], axis=axis,
-                                       convert=False)
-
-                # need to merge the result labels and the missing labels
-                if len(missing):
-                    l = np.arange(len(indexer))
-
-                    missing = com._ensure_platform_int(missing)
-                    missing_labels = keyarr.take(missing)
-                    missing_indexer = com._ensure_int64(l[~check])
-                    cur_labels = result._get_axis(axis).values
-                    cur_indexer = com._ensure_int64(l[check])
-
-                    new_labels = np.empty(tuple([len(indexer)]), dtype=object)
-                    new_labels[cur_indexer] = cur_labels
-                    new_labels[missing_indexer] = missing_labels
-
-                    # reindex with the specified axis
-                    ndim = self.obj.ndim
-                    if axis + 1 > ndim:
-                        raise AssertionError("invalid indexing error with "
-                                             "non-unique index")
-
-                    # a unique indexer
-                    if keyarr_is_unique:
-
-                        # see GH5553, make sure we use the right indexer
-                        new_indexer = np.arange(len(indexer))
-                        new_indexer[cur_indexer] = np.arange(
-                            len(result._get_axis(axis))
-                        )
-                        new_indexer[missing_indexer] = -1
 
-                    # we have a non_unique selector, need to use the original
-                    # indexer here
-                    else:
+                # reindex with the specified axis
+                if axis + 1 > self.obj.ndim:
+                    raise AssertionError("invalid indexing error with "
+                                         "non-unique index")
 
-                        # need to retake to have the same size as the indexer
-                        rindexer = indexer.values
-                        rindexer[~check] = 0
-                        result = self.obj.take(rindexer, axis=axis,
-                                               convert=False)
+                new_target, indexer, new_indexer = labels._reindex_non_unique(keyarr)
 
-                        # reset the new indexer to account for the new size
-                        new_indexer = np.arange(len(result))
-                        new_indexer[~check] = -1
+                if new_indexer is not None:
+                    result = self.obj.take(indexer[indexer!=-1], axis=axis,
+                                           convert=False)
 
                     result = result._reindex_with_indexers({
-                        axis: [new_labels, new_indexer]
-                    }, copy=True, allow_dups=True)
+                        axis: [new_target, new_indexer]
+                        }, copy=True, allow_dups=True)
+
+                else:
+                    result = self.obj.take(indexer, axis=axis,
+                                           convert=False)
 
                 return result
 
@@ -1105,8 +1068,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
                 else:
                     objarr = _asarray_tuplesafe(obj)
 
-                # If have integer labels, defer to label-based indexing
-                indexer = labels._convert_list_indexer_for_mixed(objarr, kind=self.name)
+                # The index may want to handle a list indexer differently
+                # by returning an indexer or raising
+                indexer = labels._convert_list_indexer(objarr, kind=self.name)
                 if indexer is not None:
                     return indexer
 
@@ -1719,19 +1683,6 @@ def get_indexer(_i, _idx):
     return tuple([get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)])
 
 
-def safe_append_to_index(index, key):
-    """ a safe append to an index, if incorrect type, then catch and recreate
-    """
-    try:
-        return index.insert(len(index), key)
-    except:
-
-        # raise here as this is basically an unsafe operation and we want
-        # it to be obvious that you are doing something wrong
-        raise ValueError("unsafe appending to index of type {0} with a key "
-                         "{1}".format(index.__class__.__name__, key))
-
-
 def maybe_convert_indices(indices, n):
     """ if we have negative indicies, translate to postive here
     if have indicies that are out-of-bounds, raise an IndexError
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 864dc0dd46de2..440892f8e8b59 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -3134,7 +3134,6 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
 
         pandas-indexer with -1's only.
         """
-
         if indexer is None:
             if new_axis is self.axes[axis] and not copy:
                 return self
@@ -3146,10 +3145,9 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
 
         self._consolidate_inplace()
 
-        # trying to reindex on an axis with duplicates
-        if (not allow_dups and not self.axes[axis].is_unique
-            and len(indexer)):
-            raise ValueError("cannot reindex from a duplicate axis")
+        # some axes don't allow reindexing with dups
+        if not allow_dups:
+            self.axes[axis]._can_reindex(indexer)
 
         if axis >= self.ndim:
             raise IndexError("Requested axis not found in manager")
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7bcf6c6671152..685d44acafe53 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2594,8 +2594,9 @@ def _try_cast(arr, take_fast_path):
 
     # GH #846
     if isinstance(data, (np.ndarray, Index, Series)):
-        subarr = np.array(data, copy=False)
+
         if dtype is not None:
+            subarr = np.array(data, copy=False)
 
             # possibility of nan -> garbage
             if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype):
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index af48774492b11..97fa442595893 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 
-from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Timestamp
+from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Timestamp, CategoricalIndex
 
 from pandas.core.config import option_context
 import pandas.core.common as com
@@ -93,6 +93,24 @@ def test_constructor_unsortable(self):
         else:
             Categorical.from_array(arr, ordered=True)
 
+    def test_is_equal_dtype(self):
+
+        # test dtype comparisons between cats
+
+        c1 = Categorical(list('aabca'),categories=list('abc'),ordered=False)
+        c2 = Categorical(list('aabca'),categories=list('cab'),ordered=False)
+        c3 = Categorical(list('aabca'),categories=list('cab'),ordered=True)
+        self.assertTrue(c1.is_dtype_equal(c1))
+        self.assertTrue(c2.is_dtype_equal(c2))
+        self.assertTrue(c3.is_dtype_equal(c3))
+        self.assertFalse(c1.is_dtype_equal(c2))
+        self.assertFalse(c1.is_dtype_equal(c3))
+        self.assertFalse(c1.is_dtype_equal(Index(list('aabca'))))
+        self.assertFalse(c1.is_dtype_equal(c1.astype(object)))
+        self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1)))
+        self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1,categories=list('cab'))))
+        self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1,ordered=True)))
+
     def test_constructor(self):
 
         exp_arr = np.array(["a", "b", "c", "a", "b", "c"])
@@ -224,6 +242,18 @@ def f():
             c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])
             cat = Categorical([1,2], categories=[1,2,3])
 
+        # this is a legitimate constructor
+        with tm.assert_produces_warning(None):
+            c = Categorical(np.array([],dtype='int64'),categories=[3,2,1],ordered=True)
+
+    def test_constructor_with_index(self):
+
+        ci = CategoricalIndex(list('aabbca'),categories=list('cab'))
+        self.assertTrue(ci.values.equals(Categorical(ci)))
+
+        ci = CategoricalIndex(list('aabbca'),categories=list('cab'))
+        self.assertTrue(ci.values.equals(Categorical(ci.astype(object),categories=ci.categories)))
+
     def test_constructor_with_generator(self):
         # This was raising an Error in isnull(single_val).any() because isnull returned a scalar
         # for a generator
@@ -2562,6 +2592,8 @@ def f():
         dfx['grade'].cat.categories
         self.assert_numpy_array_equal(df['grade'].cat.categories, dfx['grade'].cat.categories)
 
+    def test_concat_preserve(self):
+
         # GH 8641
         # series concat not preserving category dtype
         s = Series(list('abc'),dtype='category')
@@ -2579,6 +2611,28 @@ def f():
         expected = Series(list('abcabc'),index=[0,1,2,0,1,2]).astype('category')
         tm.assert_series_equal(result, expected)
 
+        a = Series(np.arange(6,dtype='int64'))
+        b = Series(list('aabbca'))
+
+        df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) })
+        result = pd.concat([df2,df2])
+        expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) })
+        tm.assert_frame_equal(result, expected)
+
+    def test_categorical_index_preserver(self):
+
+        a = Series(np.arange(6,dtype='int64'))
+        b = Series(list('aabbca'))
+
+        df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) }).set_index('B')
+        result = pd.concat([df2,df2])
+        expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) }).set_index('B')
+        tm.assert_frame_equal(result, expected)
+
+        # wrong catgories
+        df3 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('abc')) }).set_index('B')
+        self.assertRaises(TypeError, lambda : pd.concat([df2,df3]))
+
     def test_append(self):
         cat = pd.Categorical(["a","b"], categories=["a","b"])
         vals = [1,2]
@@ -2714,6 +2768,14 @@ def cmp(a,b):
             self.assertRaises(TypeError, lambda : invalid(s))
 
 
+    def test_astype_categorical(self):
+
+        cat = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
+        tm.assert_categorical_equal(cat,cat.astype('category'))
+        tm.assert_almost_equal(np.array(cat),cat.astype('object'))
+
+        self.assertRaises(ValueError, lambda : cat.astype(float))
+
     def test_to_records(self):
 
         # GH8626
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a35e03d53cb31..5912ccb1494fe 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -33,7 +33,7 @@
 import pandas.core.datetools as datetools
 from pandas import (DataFrame, Index, Series, Panel, notnull, isnull,
                     MultiIndex, DatetimeIndex, Timestamp, date_range,
-                    read_csv, timedelta_range, Timedelta,
+                    read_csv, timedelta_range, Timedelta, CategoricalIndex,
                     option_context)
 import pandas as pd
 from pandas.parser import CParserError
@@ -2386,6 +2386,32 @@ def test_set_index_pass_arrays(self):
         expected = df.set_index(['A', 'B'], drop=False)
         assert_frame_equal(result, expected, check_names=False) # TODO should set_index check_names ?
 
+    def test_construction_with_categorical_index(self):
+
+        ci = tm.makeCategoricalIndex(10)
+
+        # with Categorical
+        df = DataFrame({'A' : np.random.randn(10),
+                        'B' : ci.values })
+        idf = df.set_index('B')
+        str(idf)
+        tm.assert_index_equal(idf.index,ci)
+
+        # from a CategoricalIndex
+        df = DataFrame({'A' : np.random.randn(10),
+                        'B' : ci })
+        idf = df.set_index('B')
+        str(idf)
+        tm.assert_index_equal(idf.index,ci)
+
+        idf = df.set_index('B').reset_index().set_index('B')
+        str(idf)
+        tm.assert_index_equal(idf.index,ci)
+
+        new_df = idf.reset_index()
+        new_df.index = df.B
+        tm.assert_index_equal(new_df.index,ci)
+
     def test_set_index_cast_datetimeindex(self):
         df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
                               for i in range(1000)],
@@ -10744,6 +10770,19 @@ def test_sort_index(self):
         with assertRaisesRegexp(ValueError, msg):
             frame.sort_index(by=['A', 'B'], axis=0, ascending=[True] * 5)
 
+    def test_sort_index_categorical_index(self):
+
+        df = DataFrame({'A' : np.arange(6,dtype='int64'),
+                        'B' : Series(list('aabbca')).astype('category',categories=list('cab')) }).set_index('B')
+
+        result = df.sort_index()
+        expected = df.iloc[[4,0,1,5,2,3]]
+        assert_frame_equal(result, expected)
+
+        result = df.sort_index(ascending=False)
+        expected = df.iloc[[3,2,5,1,0,4]]
+        assert_frame_equal(result, expected)
+
     def test_sort_nan(self):
         # GH3917
         nan = np.nan
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 87536b9bf0ff8..c5a338520df21 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -8,7 +8,7 @@
 from numpy import nan
 
 from pandas import date_range,bdate_range, Timestamp
-from pandas.core.index import Index, MultiIndex, Int64Index
+from pandas.core.index import Index, MultiIndex, Int64Index, CategoricalIndex
 from pandas.core.api import Categorical, DataFrame
 from pandas.core.groupby import (SpecificationError, DataError,
                                  _nargsort, _lexsort_indexer)
@@ -3378,12 +3378,11 @@ def test_groupby_datetime_categorical(self):
         cats = Categorical.from_codes(codes, levels, name='myfactor', ordered=True)
 
         data = DataFrame(np.random.randn(100, 4))
-
         result = data.groupby(cats).mean()
 
         expected = data.groupby(np.asarray(cats)).mean()
         expected = expected.reindex(levels)
-        expected.index.name = 'myfactor'
+        expected.index = CategoricalIndex(expected.index,categories=expected.index,name='myfactor',ordered=True)
 
         assert_frame_equal(result, expected)
         self.assertEqual(result.index.name, cats.name)
@@ -3398,6 +3397,26 @@ def test_groupby_datetime_categorical(self):
         expected.index.names = ['myfactor', None]
         assert_frame_equal(desc_result, expected)
 
+    def test_groupby_categorical_index(self):
+
+        levels = ['foo', 'bar', 'baz', 'qux']
+        codes = np.random.randint(0, 4, size=20)
+        cats = Categorical.from_codes(codes, levels, name='myfactor', ordered=True)
+        df = DataFrame(np.repeat(np.arange(20),4).reshape(-1,4), columns=list('abcd'))
+        df['cats'] = cats
+
+        # with a cat index
+        result = df.set_index('cats').groupby(level=0).sum()
+        expected = df[list('abcd')].groupby(cats.codes).sum()
+        expected.index = CategoricalIndex(Categorical.from_codes([0,1,2,3], levels, ordered=True),name='cats')
+        assert_frame_equal(result, expected)
+
+        # with a cat column, should produce a cat index
+        result = df.groupby('cats').sum()
+        expected = df[list('abcd')].groupby(cats.codes).sum()
+        expected.index = CategoricalIndex(Categorical.from_codes([0,1,2,3], levels, ordered=True),name='cats')
+        assert_frame_equal(result, expected)
+
     def test_groupby_groups_datetimeindex(self):
         # #1430
         from pandas.tseries.api import DatetimeIndex
@@ -3526,6 +3545,8 @@ def test_groupby_categorical_no_compress(self):
 
         result = data.groupby(cats).mean()
         exp = data.groupby(codes).mean()
+
+        exp.index = CategoricalIndex(exp.index,categories=cats.categories,ordered=cats.ordered)
         assert_series_equal(result, exp)
 
         codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
@@ -3533,6 +3554,7 @@ def test_groupby_categorical_no_compress(self):
 
         result = data.groupby(cats).mean()
         exp = data.groupby(codes).mean().reindex(cats.categories)
+        exp.index = CategoricalIndex(exp.index,categories=cats.categories,ordered=cats.ordered)
         assert_series_equal(result, exp)
 
         cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 336340dd95991..1d59d1f3fbfe3 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -12,14 +12,10 @@
 import numpy as np
 from numpy.testing import assert_array_equal
 
-from pandas import period_range, date_range
-
-from pandas.core.index import (Index, Float64Index, Int64Index, MultiIndex,
-                               InvalidIndexError, NumericIndex)
-from pandas.tseries.index import DatetimeIndex
-from pandas.tseries.tdi import TimedeltaIndex
-from pandas.tseries.period import PeriodIndex
-from pandas.core.series import Series
+from pandas import (period_range, date_range, Categorical, Series,
+                    Index, Float64Index, Int64Index, MultiIndex,
+                    CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex)
+from pandas.core.index import InvalidIndexError, NumericIndex
 from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
                                  assert_copy)
 from pandas import compat
@@ -41,6 +37,11 @@ class Base(object):
     _holder = None
     _compat_props = ['shape', 'ndim', 'size', 'itemsize', 'nbytes']
 
+    def setup_indices(self):
+        # setup the test indices in the self.indicies dict
+        for name, ind in self.indices.items():
+            setattr(self, name, ind)
+
     def verify_pickle(self,index):
         unpickled = self.round_trip_pickle(index)
         self.assertTrue(index.equals(unpickled))
@@ -98,6 +99,7 @@ def f():
     def test_reindex_base(self):
         idx = self.create_index()
         expected = np.arange(idx.size)
+
         actual = idx.get_indexer(idx)
         assert_array_equal(expected, actual)
 
@@ -118,29 +120,6 @@ def test_ndarray_compat_properties(self):
         idx.nbytes
         idx.values.nbytes
 
-
-class TestIndex(Base, tm.TestCase):
-    _holder = Index
-    _multiprocess_can_split_ = True
-
-    def setUp(self):
-        self.indices = dict(
-            unicodeIndex = tm.makeUnicodeIndex(100),
-            strIndex = tm.makeStringIndex(100),
-            dateIndex = tm.makeDateIndex(100),
-            intIndex = tm.makeIntIndex(100),
-            floatIndex = tm.makeFloatIndex(100),
-            boolIndex = Index([True,False]),
-            empty = Index([]),
-            tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'],
-                                                 [1, 2, 3]))
-        )
-        for name, ind in self.indices.items():
-            setattr(self, name, ind)
-
-    def create_index(self):
-        return Index(list('abcde'))
-
     def test_wrong_number_names(self):
         def testit(ind):
             ind.names = ["apple", "banana", "carrot"]
@@ -150,14 +129,18 @@ def testit(ind):
 
     def test_set_name_methods(self):
         new_name = "This is the new name for this index"
-        indices = (self.dateIndex, self.intIndex, self.unicodeIndex,
-                   self.empty)
-        for ind in indices:
+        for ind in self.indices.values():
+
+            # don't tests a MultiIndex here (as its tested separated)
+            if isinstance(ind, MultiIndex):
+                continue
+
             original_name = ind.name
             new_ind = ind.set_names([new_name])
             self.assertEqual(new_ind.name, new_name)
             self.assertEqual(ind.name, original_name)
             res = ind.rename(new_name, inplace=True)
+
             # should return None
             self.assertIsNone(res)
             self.assertEqual(ind.name, new_name)
@@ -167,46 +150,128 @@ def test_set_name_methods(self):
             #    ind.set_names("a")
             with assertRaisesRegexp(ValueError, "Level must be None"):
                 ind.set_names("a", level=0)
-        # rename in place just leaves tuples and other containers alone
-        name = ('A', 'B')
-        ind = self.intIndex
-        ind.rename(name, inplace=True)
-        self.assertEqual(ind.name, name)
-        self.assertEqual(ind.names, [name])
 
-    def test_hash_error(self):
-        with tm.assertRaisesRegexp(TypeError,
-                                   "unhashable type: %r" %
-                                   type(self.strIndex).__name__):
-            hash(self.strIndex)
+            # rename in place just leaves tuples and other containers alone
+            name = ('A', 'B')
+            ind.rename(name, inplace=True)
+            self.assertEqual(ind.name, name)
+            self.assertEqual(ind.names, [name])
 
-    def test_new_axis(self):
-        new_index = self.dateIndex[None, :]
-        self.assertEqual(new_index.ndim, 2)
-        tm.assert_isinstance(new_index, np.ndarray)
+    def test_hash_error(self):
+        for ind in self.indices.values():
+            with tm.assertRaisesRegexp(TypeError,
+                                       "unhashable type: %r" %
+                                       type(ind).__name__):
+                hash(ind)
 
     def test_copy_and_deepcopy(self):
         from copy import copy, deepcopy
 
-        for func in (copy, deepcopy):
-            idx_copy = func(self.strIndex)
-            self.assertIsNot(idx_copy, self.strIndex)
-            self.assertTrue(idx_copy.equals(self.strIndex))
+        for ind in self.indices.values():
 
-        new_copy = self.strIndex.copy(deep=True, name="banana")
-        self.assertEqual(new_copy.name, "banana")
-        new_copy2 = self.intIndex.copy(dtype=int)
-        self.assertEqual(new_copy2.dtype.kind, 'i')
+            # don't tests a MultiIndex here (as its tested separated)
+            if isinstance(ind, MultiIndex):
+                continue
+
+            for func in (copy, deepcopy):
+                idx_copy = func(ind)
+                self.assertIsNot(idx_copy, ind)
+                self.assertTrue(idx_copy.equals(ind))
+
+            new_copy = ind.copy(deep=True, name="banana")
+            self.assertEqual(new_copy.name, "banana")
 
     def test_duplicates(self):
-        idx = Index([0, 0, 0])
-        self.assertFalse(idx.is_unique)
+        for ind in self.indices.values():
+
+            if not len(ind):
+                continue
+            idx = self._holder([ind[0]]*5)
+            self.assertFalse(idx.is_unique)
+            self.assertTrue(idx.has_duplicates)
 
     def test_sort(self):
-        self.assertRaises(TypeError, self.strIndex.sort)
+        for ind in self.indices.values():
+            self.assertRaises(TypeError, ind.sort)
 
     def test_mutability(self):
-        self.assertRaises(TypeError, self.strIndex.__setitem__, 0, 'foo')
+        for ind in self.indices.values():
+            if not len(ind):
+                continue
+            self.assertRaises(TypeError, ind.__setitem__, 0, ind[0])
+
+    def test_view(self):
+        for ind in self.indices.values():
+            i_view = ind.view()
+            self.assertEqual(i_view.name, ind.name)
+
+    def test_compat(self):
+        for ind in self.indices.values():
+            self.assertEqual(ind.tolist(),list(ind))
+
+    def test_argsort(self):
+        for k, ind in self.indices.items():
+
+            # sep teststed
+            if k in ['catIndex']:
+                continue
+
+            result = ind.argsort()
+            expected = np.array(ind).argsort()
+            self.assert_numpy_array_equal(result, expected)
+
+    def test_pickle(self):
+        for ind in self.indices.values():
+            self.verify_pickle(ind)
+            ind.name = 'foo'
+            self.verify_pickle(ind)
+
+    def test_take(self):
+        indexer = [4, 3, 0, 2]
+        for k, ind in self.indices.items():
+
+            # separate
+            if k in ['boolIndex','tuples','empty']:
+                continue
+
+            result = ind.take(indexer)
+            expected = ind[indexer]
+            self.assertTrue(result.equals(expected))
+
+class TestIndex(Base, tm.TestCase):
+    _holder = Index
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        self.indices = dict(
+            unicodeIndex = tm.makeUnicodeIndex(100),
+            strIndex = tm.makeStringIndex(100),
+            dateIndex = tm.makeDateIndex(100),
+            periodIndex = tm.makePeriodIndex(100),
+            tdIndex = tm.makeTimedeltaIndex(100),
+            intIndex = tm.makeIntIndex(100),
+            floatIndex = tm.makeFloatIndex(100),
+            boolIndex = Index([True,False]),
+            catIndex = tm.makeCategoricalIndex(100),
+            empty = Index([]),
+            tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'],
+                                                 [1, 2, 3]))
+        )
+        self.setup_indices()
+
+    def create_index(self):
+        return Index(list('abcde'))
+
+    def test_new_axis(self):
+        new_index = self.dateIndex[None, :]
+        self.assertEqual(new_index.ndim, 2)
+        tm.assert_isinstance(new_index, np.ndarray)
+
+    def test_copy_and_deepcopy(self):
+        super(TestIndex, self).test_copy_and_deepcopy()
+
+        new_copy2 = self.intIndex.copy(dtype=int)
+        self.assertEqual(new_copy2.dtype.kind, 'i')
 
     def test_constructor(self):
         # regular instance creation
@@ -297,18 +362,22 @@ def test_constructor_simple_new(self):
         result = idx._simple_new(idx, 'obj')
         self.assertTrue(result.equals(idx))
 
-    def test_copy(self):
-        i = Index([], name='Foo')
-        i_copy = i.copy()
-        self.assertEqual(i_copy.name, 'Foo')
+    def test_view_with_args(self):
 
-    def test_view(self):
-        i = Index([], name='Foo')
-        i_view = i.view()
-        self.assertEqual(i_view.name, 'Foo')
+        restricted = ['unicodeIndex','strIndex','catIndex','boolIndex','empty']
+
+        for i in restricted:
+            ind = self.indices[i]
 
-        # with arguments
-        self.assertRaises(TypeError, lambda : i.view('i8'))
+            # with arguments
+            self.assertRaises(TypeError, lambda : ind.view('i8'))
+
+        # these are ok
+        for i in list(set(self.indices.keys())-set(restricted)):
+            ind = self.indices[i]
+
+            # with arguments
+            ind.view('i8')
 
     def test_legacy_pickle_identity(self):
 
@@ -330,9 +399,6 @@ def test_astype(self):
         casted = self.intIndex.astype('i8')
         self.assertEqual(casted.name, 'foobar')
 
-    def test_compat(self):
-        self.strIndex.tolist()
-
     def test_equals(self):
         # same
         self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])))
@@ -459,11 +525,6 @@ def test_nanosecond_index_access(self):
 
         self.assertEqual(first_value, x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', 'ns'))])
 
-    def test_argsort(self):
-        result = self.strIndex.argsort()
-        expected = np.array(self.strIndex).argsort()
-        self.assert_numpy_array_equal(result, expected)
-
     def test_comparators(self):
         index = self.dateIndex
         element = index[len(index) // 2]
@@ -760,22 +821,17 @@ def test_symmetric_diff(self):
         with tm.assertRaises(TypeError):
             Index(idx1,dtype='object') - 1
 
-    def test_pickle(self):
-
-        self.verify_pickle(self.strIndex)
-        self.strIndex.name = 'foo'
-        self.verify_pickle(self.strIndex)
-        self.verify_pickle(self.dateIndex)
-
     def test_is_numeric(self):
         self.assertFalse(self.dateIndex.is_numeric())
         self.assertFalse(self.strIndex.is_numeric())
         self.assertTrue(self.intIndex.is_numeric())
         self.assertTrue(self.floatIndex.is_numeric())
+        self.assertFalse(self.catIndex.is_numeric())
 
     def test_is_object(self):
         self.assertTrue(self.strIndex.is_object())
         self.assertTrue(self.boolIndex.is_object())
+        self.assertFalse(self.catIndex.is_object())
         self.assertFalse(self.intIndex.is_object())
         self.assertFalse(self.dateIndex.is_object())
         self.assertFalse(self.floatIndex.is_object())
@@ -839,12 +895,6 @@ def test_format_none(self):
         idx.format()
         self.assertIsNone(idx[3])
 
-    def test_take(self):
-        indexer = [4, 3, 0, 2]
-        result = self.dateIndex.take(indexer)
-        expected = self.dateIndex[indexer]
-        self.assertTrue(result.equals(expected))
-
     def test_logical_compat(self):
         idx = self.create_index()
         self.assertEqual(idx.all(), idx.values.all())
@@ -857,6 +907,7 @@ def _check_method_works(self, method):
         method(self.strIndex)
         method(self.intIndex)
         method(self.tuples)
+        method(self.catIndex)
 
     def test_get_indexer(self):
         idx1 = Index([1, 2, 3, 4, 5])
@@ -1338,6 +1389,352 @@ def test_equals_op(self):
             index_b == index_a,
         )
 
+class TestCategoricalIndex(Base, tm.TestCase):
+    _holder = CategoricalIndex
+
+    def setUp(self):
+        self.indices = dict(catIndex = tm.makeCategoricalIndex(100))
+        self.setup_indices()
+
+    def create_index(self, categories=None, ordered=False):
+        if categories is None:
+            categories = list('cab')
+        return CategoricalIndex(list('aabbca'), categories=categories, ordered=ordered)
+
+    def test_construction(self):
+
+        ci = self.create_index(categories=list('abcd'))
+        categories = ci.categories
+
+        result = Index(ci)
+        tm.assert_index_equal(result,ci,exact=True)
+        self.assertFalse(result.ordered)
+
+        result = Index(ci.values)
+        tm.assert_index_equal(result,ci,exact=True)
+        self.assertFalse(result.ordered)
+
+        # empty
+        result = CategoricalIndex(categories=categories)
+        self.assertTrue(result.categories.equals(Index(categories)))
+        self.assert_numpy_array_equal(result.codes,np.array([],dtype='int8'))
+        self.assertFalse(result.ordered)
+
+        # passing categories
+        result = CategoricalIndex(list('aabbca'),categories=categories)
+        self.assertTrue(result.categories.equals(Index(categories)))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
+
+        c = pd.Categorical(list('aabbca'))
+        result = CategoricalIndex(c)
+        self.assertTrue(result.categories.equals(Index(list('abc'))))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
+        self.assertFalse(result.ordered)
+
+        result = CategoricalIndex(c,categories=categories)
+        self.assertTrue(result.categories.equals(Index(categories)))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
+        self.assertFalse(result.ordered)
+
+        ci = CategoricalIndex(c,categories=list('abcd'))
+        result = CategoricalIndex(ci)
+        self.assertTrue(result.categories.equals(Index(categories)))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
+        self.assertFalse(result.ordered)
+
+        result = CategoricalIndex(ci, categories=list('ab'))
+        self.assertTrue(result.categories.equals(Index(list('ab'))))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,-1,0],dtype='int8'))
+        self.assertFalse(result.ordered)
+
+        result = CategoricalIndex(ci, categories=list('ab'), ordered=True)
+        self.assertTrue(result.categories.equals(Index(list('ab'))))
+        self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,-1,0],dtype='int8'))
+        self.assertTrue(result.ordered)
+
+        # turn me to an Index
+        result = Index(np.array(ci))
+        self.assertIsInstance(result, Index)
+        self.assertNotIsInstance(result, CategoricalIndex)
+
+    def test_construction_with_dtype(self):
+
+        # specify dtype
+        ci = self.create_index(categories=list('abc'))
+
+        result = Index(np.array(ci), dtype='category')
+        tm.assert_index_equal(result,ci,exact=True)
+
+        result = Index(np.array(ci).tolist(), dtype='category')
+        tm.assert_index_equal(result,ci,exact=True)
+
+        # these are generally only equal when the categories are reordered
+        ci = self.create_index()
+
+        result = Index(np.array(ci), dtype='category').reorder_categories(ci.categories)
+        tm.assert_index_equal(result,ci,exact=True)
+
+        # make sure indexes are handled
+        expected = CategoricalIndex([0,1,2], categories=[0,1,2], ordered=True)
+        idx = Index(range(3))
+        result = CategoricalIndex(idx, categories=idx, ordered=True)
+        tm.assert_index_equal(result, expected, exact=True)
+
+    def test_method_delegation(self):
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
+        result = ci.set_categories(list('cab'))
+        tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cab')))
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
+        result = ci.rename_categories(list('efg'))
+        tm.assert_index_equal(result, CategoricalIndex(list('ffggef'), categories=list('efg')))
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
+        result = ci.add_categories(['d'])
+        tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cabd')))
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
+        result = ci.remove_categories(['c'])
+        tm.assert_index_equal(result, CategoricalIndex(list('aabb') + [np.nan] + ['a'], categories=list('ab')))
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
+        result = ci.as_unordered()
+        tm.assert_index_equal(result, ci)
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
+        result = ci.as_ordered()
+        tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cabdef'), ordered=True))
+
+        # invalid
+        self.assertRaises(ValueError, lambda : ci.set_categories(list('cab'), inplace=True))
+
+    def test_contains(self):
+
+        ci = self.create_index(categories=list('cabdef'))
+
+        self.assertTrue('a' in ci)
+        self.assertTrue('z' not in ci)
+        self.assertTrue('e' not in ci)
+        self.assertTrue(np.nan not in ci)
+
+        # assert codes NOT in index
+        self.assertFalse(0 in ci)
+        self.assertFalse(1 in ci)
+
+        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef') + [np.nan])
+        self.assertFalse(np.nan in ci)
+
+        ci = CategoricalIndex(list('aabbca') + [np.nan], categories=list('cabdef') + [np.nan])
+        self.assertTrue(np.nan in ci)
+
+    def test_min_max(self):
+
+        ci = self.create_index(ordered=False)
+        self.assertRaises(TypeError, lambda : ci.min())
+        self.assertRaises(TypeError, lambda : ci.max())
+
+        ci = self.create_index(ordered=True)
+
+        self.assertEqual(ci.min(),'c')
+        self.assertEqual(ci.max(),'b')
+
+    def test_append(self):
+
+        ci = self.create_index()
+        categories = ci.categories
+
+        # append cats with the same categories
+        result = ci[:3].append(ci[3:])
+        tm.assert_index_equal(result,ci,exact=True)
+
+        foos = [ci[:1], ci[1:3], ci[3:]]
+        result = foos[0].append(foos[1:])
+        tm.assert_index_equal(result,ci,exact=True)
+
+        # empty
+        result = ci.append([])
+        tm.assert_index_equal(result,ci,exact=True)
+
+        # appending with different categories or reoreded is not ok
+        self.assertRaises(TypeError, lambda : ci.append(ci.values.set_categories(list('abcd'))))
+        self.assertRaises(TypeError, lambda : ci.append(ci.values.reorder_categories(list('abc'))))
+
+        # with objects
+        result = ci.append(['c','a'])
+        expected = CategoricalIndex(list('aabbcaca'), categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        # invalid objects
+        self.assertRaises(TypeError, lambda : ci.append(['a','d']))
+
+    def test_insert(self):
+
+        ci = self.create_index()
+        categories = ci.categories
+
+        #test 0th element
+        result = ci.insert(0, 'a')
+        expected = CategoricalIndex(list('aaabbca'),categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        #test Nth element that follows Python list behavior
+        result = ci.insert(-1, 'a')
+        expected = CategoricalIndex(list('aabbcaa'),categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        #test empty
+        result = CategoricalIndex(categories=categories).insert(0, 'a')
+        expected = CategoricalIndex(['a'],categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        # invalid
+        self.assertRaises(TypeError, lambda : ci.insert(0,'d'))
+
+    def test_delete(self):
+
+        ci = self.create_index()
+        categories = ci.categories
+
+        result = ci.delete(0)
+        expected = CategoricalIndex(list('abbca'),categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        result = ci.delete(-1)
+        expected = CategoricalIndex(list('aabbc'),categories=categories)
+        tm.assert_index_equal(result,expected,exact=True)
+
+        with tm.assertRaises((IndexError, ValueError)):
+            # either depeidnig on numpy version
+            result = ci.delete(10)
+
+    def test_astype(self):
+
+        ci = self.create_index()
+        result = ci.astype('category')
+        tm.assert_index_equal(result,ci,exact=True)
+
+        result = ci.astype(object)
+        self.assertTrue(result.equals(Index(np.array(ci))))
+
+        # this IS equal, but not the same class
+        self.assertTrue(result.equals(ci))
+        self.assertIsInstance(result, Index)
+        self.assertNotIsInstance(result, CategoricalIndex)
+
+    def test_reindex_base(self):
+
+        # determined by cat ordering
+        idx = self.create_index()
+        expected = np.array([4,0,1,5,2,3])
+
+        actual = idx.get_indexer(idx)
+        assert_array_equal(expected, actual)
+
+        with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'):
+            idx.get_indexer(idx, method='invalid')
+
+    def test_reindexing(self):
+
+        ci = self.create_index()
+        oidx = Index(np.array(ci))
+
+        for n in [1,2,5,len(ci)]:
+            finder = oidx[np.random.randint(0,len(ci),size=n)]
+            expected = oidx.get_indexer_non_unique(finder)[0]
+
+            actual = ci.get_indexer(finder)
+            assert_array_equal(expected, actual)
+
+    def test_duplicates(self):
+
+        idx = CategoricalIndex([0, 0, 0])
+        self.assertFalse(idx.is_unique)
+        self.assertTrue(idx.has_duplicates)
+
+    def test_get_indexer(self):
+
+        idx1 = CategoricalIndex(list('aabcde'),categories=list('edabc'))
+        idx2 = CategoricalIndex(list('abf'))
+
+        for indexer in [idx2, list('abf'), Index(list('abf'))]:
+            r1 = idx1.get_indexer(idx2)
+            assert_almost_equal(r1, [0, 1, 2, -1])
+
+        self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='pad'))
+        self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill'))
+        self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest'))
+
+    def test_repr(self):
+
+        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
+        str(ci)
+        tm.assert_index_equal(eval(repr(ci)),ci,exact=True)
+
+        # formatting
+        if compat.PY3:
+            str(ci)
+        else:
+            compat.text_type(ci)
+
+        # long format
+        ci = CategoricalIndex(np.random.randint(0,5,size=100))
+        result = str(ci)
+        tm.assert_index_equal(eval(repr(ci)),ci,exact=True)
+
+    def test_isin(self):
+
+        ci = CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan])
+        self.assert_numpy_array_equal(ci.isin(['c']),np.array([False,False,False,True,False,False]))
+        self.assert_numpy_array_equal(ci.isin(['c','a','b']),np.array([True]*5 + [False]))
+        self.assert_numpy_array_equal(ci.isin(['c','a','b',np.nan]),np.array([True]*6))
+
+        # mismatched categorical -> coerced to ndarray so doesn't matter
+        self.assert_numpy_array_equal(ci.isin(ci.set_categories(list('abcdefghi'))),np.array([True]*6))
+        self.assert_numpy_array_equal(ci.isin(ci.set_categories(list('defghi'))),np.array([False]*5 + [True]))
+
+    def test_identical(self):
+
+        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
+        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True)
+        self.assertTrue(ci1.identical(ci1))
+        self.assertTrue(ci1.identical(ci1.copy()))
+        self.assertFalse(ci1.identical(ci2))
+
+    def test_equals(self):
+
+        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
+        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True)
+
+        self.assertTrue(ci1.equals(ci1))
+        self.assertFalse(ci1.equals(ci2))
+        self.assertTrue(ci1.equals(ci1.astype(object)))
+        self.assertTrue(ci1.astype(object).equals(ci1))
+
+        self.assertTrue((ci1 == ci1).all())
+        self.assertFalse((ci1 != ci1).all())
+        self.assertFalse((ci1 > ci1).all())
+        self.assertFalse((ci1 < ci1).all())
+        self.assertTrue((ci1 <= ci1).all())
+        self.assertTrue((ci1 >= ci1).all())
+
+        self.assertFalse((ci1 == 1).all())
+        self.assertTrue((ci1 == Index(['a','b'])).all())
+        self.assertTrue((ci1 == ci1.values).all())
+
+        # invalid comparisons
+        self.assertRaises(TypeError, lambda : ci1 == Index(['a','b','c']))
+        self.assertRaises(TypeError, lambda : ci1 == ci2)
+        self.assertRaises(TypeError, lambda : ci1 == Categorical(ci1.values, ordered=False))
+        self.assertRaises(TypeError, lambda : ci1 == Categorical(ci1.values, categories=list('abc')))
+
+        # tests
+        # make sure that we are testing for category inclusion properly
+        self.assertTrue(CategoricalIndex(list('aabca'),categories=['c','a','b']).equals(list('aabca')))
+        self.assertTrue(CategoricalIndex(list('aabca'),categories=['c','a','b',np.nan]).equals(list('aabca')))
+
+        self.assertFalse(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca')))
+        self.assertTrue(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca') + [np.nan]))
 
 class Numeric(Base):
 
@@ -1417,18 +1814,13 @@ class TestFloat64Index(Numeric, tm.TestCase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.mixed = Float64Index([1.5, 2, 3, 4, 5])
-        self.float = Float64Index(np.arange(5) * 2.5)
+        self.indices = dict(mixed = Float64Index([1.5, 2, 3, 4, 5]),
+                            float = Float64Index(np.arange(5) * 2.5))
+        self.setup_indices()
 
     def create_index(self):
         return Float64Index(np.arange(5,dtype='float64'))
 
-    def test_hash_error(self):
-        with tm.assertRaisesRegexp(TypeError,
-                                   "unhashable type: %r" %
-                                   type(self.float).__name__):
-            hash(self.float)
-
     def test_repr_roundtrip(self):
         for ind in (self.mixed, self.float):
             tm.assert_index_equal(eval(repr(ind)), ind)
@@ -1594,7 +1986,8 @@ class TestInt64Index(Numeric, tm.TestCase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.index = Int64Index(np.arange(0, 20, 2))
+        self.indices = dict(index = Int64Index(np.arange(0, 20, 2)))
+        self.setup_indices()
 
     def create_index(self):
         return Int64Index(np.arange(5,dtype='int64'))
@@ -1641,18 +2034,14 @@ def test_constructor_corner(self):
         with tm.assertRaisesRegexp(TypeError, 'casting'):
             Int64Index(arr_with_floats)
 
-    def test_hash_error(self):
-        with tm.assertRaisesRegexp(TypeError,
-                                   "unhashable type: %r" %
-                                   type(self.index).__name__):
-            hash(self.index)
-
     def test_copy(self):
         i = Int64Index([], name='Foo')
         i_copy = i.copy()
         self.assertEqual(i_copy.name, 'Foo')
 
     def test_view(self):
+        super(TestInt64Index, self).test_view()
+
         i = Int64Index([], name='Foo')
         i_view = i.view()
         self.assertEqual(i_view.name, 'Foo')
@@ -2053,6 +2442,7 @@ def test_slice_keep_name(self):
 class DatetimeLike(Base):
 
     def test_view(self):
+        super(DatetimeLike, self).test_view()
 
         i = self.create_index()
 
@@ -2068,6 +2458,10 @@ class TestDatetimeIndex(DatetimeLike, tm.TestCase):
     _holder = DatetimeIndex
     _multiprocess_can_split_ = True
 
+    def setUp(self):
+        self.indices = dict(index = tm.makeDateIndex(10))
+        self.setup_indices()
+
     def create_index(self):
         return date_range('20130101',periods=5)
 
@@ -2186,6 +2580,10 @@ class TestPeriodIndex(DatetimeLike, tm.TestCase):
     _holder = PeriodIndex
     _multiprocess_can_split_ = True
 
+    def setUp(self):
+        self.indices = dict(index = tm.makePeriodIndex(10))
+        self.setup_indices()
+
     def create_index(self):
         return period_range('20130101',periods=5,freq='D')
 
@@ -2220,6 +2618,10 @@ class TestTimedeltaIndex(DatetimeLike, tm.TestCase):
     _holder = TimedeltaIndex
     _multiprocess_can_split_ = True
 
+    def setUp(self):
+        self.indices = dict(index = tm.makeTimedeltaIndex(10))
+        self.setup_indices()
+
     def create_index(self):
         return pd.to_timedelta(range(5),unit='d') + pd.offsets.Hour(1)
 
@@ -2294,9 +2696,10 @@ def setUp(self):
         major_labels = np.array([0, 0, 1, 2, 3, 3])
         minor_labels = np.array([0, 1, 0, 1, 0, 1])
         self.index_names = ['first', 'second']
-        self.index = MultiIndex(levels=[major_axis, minor_axis],
-                                labels=[major_labels, minor_labels],
-                                names=self.index_names, verify_integrity=False)
+        self.indices = dict(index = MultiIndex(levels=[major_axis, minor_axis],
+                                               labels=[major_labels, minor_labels],
+                                               names=self.index_names, verify_integrity=False))
+        self.setup_indices()
 
     def create_index(self):
         return self.index
@@ -2332,13 +2735,7 @@ def test_labels_dtypes(self):
         self.assertTrue((i.labels[0]>=0).all())
         self.assertTrue((i.labels[1]>=0).all())
 
-    def test_hash_error(self):
-        with tm.assertRaisesRegexp(TypeError,
-                                   "unhashable type: %r" %
-                                   type(self.index).__name__):
-            hash(self.index)
-
-    def test_set_names_and_rename(self):
+    def test_set_name_methods(self):
         # so long as these are synonyms, we don't need to test set_names
         self.assertEqual(self.index.rename, self.index.set_names)
         new_names = [name + "SUFFIX" for name in self.index_names]
@@ -3838,7 +4235,7 @@ def test_reindex_level(self):
         assertRaisesRegexp(TypeError, "Fill method not supported",
                            idx.reindex, idx, method='bfill', level='first')
 
-    def test_has_duplicates(self):
+    def test_duplicates(self):
         self.assertFalse(self.index.has_duplicates)
         self.assertTrue(self.index.append(self.index).has_duplicates)
 
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 5f109212add06..3872f79df7286 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -2366,6 +2366,7 @@ def test_dups_fancy_indexing(self):
 
         rows = ['C','B','E']
         expected = DataFrame({'test' : [11,9,np.nan], 'test1': [7.,6,np.nan], 'other': ['d','c',np.nan]},index=rows)
+
         result = df.ix[rows]
         assert_frame_equal(result, expected)
 
@@ -4422,6 +4423,212 @@ def test_indexing_assignment_dict_already_exists(self):
         tm.assert_frame_equal(df, expected)
 
 
+
+class TestCategoricalIndex(tm.TestCase):
+
+    def setUp(self):
+
+        self.df = DataFrame({'A' : np.arange(6,dtype='int64'),
+                             'B' : Series(list('aabbca')).astype('category',categories=list('cab')) }).set_index('B')
+        self.df2 = DataFrame({'A' : np.arange(6,dtype='int64'),
+                              'B' : Series(list('aabbca')).astype('category',categories=list('cabe')) }).set_index('B')
+        self.df3 = DataFrame({'A' : np.arange(6,dtype='int64'),
+                              'B' : Series([1,1,2,1,3,2]).astype('category',categories=[3,2,1],ordered=True) }).set_index('B')
+        self.df4 = DataFrame({'A' : np.arange(6,dtype='int64'),
+                              'B' : Series([1,1,2,1,3,2]).astype('category',categories=[3,2,1],ordered=False) }).set_index('B')
+
+
+    def test_loc_scalar(self):
+
+        result = self.df.loc['a']
+        expected = DataFrame({'A' : [0,1,5],
+                              'B' : Series(list('aaa')).astype('category',categories=list('cab')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+
+        df = self.df.copy()
+        df.loc['a'] = 20
+        expected = DataFrame({'A' : [20,20,2,3,4,20],
+                              'B' : Series(list('aabbca')).astype('category',categories=list('cab')) }).set_index('B')
+        assert_frame_equal(df, expected)
+
+        # value not in the categories
+        self.assertRaises(KeyError, lambda : df.loc['d'])
+
+        def f():
+            df.loc['d'] = 10
+        self.assertRaises(TypeError, f)
+
+        def f():
+            df.loc['d','A'] = 10
+        self.assertRaises(TypeError, f)
+
+        def f():
+            df.loc['d','C'] = 10
+        self.assertRaises(TypeError, f)
+
+    def test_loc_listlike(self):
+
+        # list of labels
+        result = self.df.loc[['c','a']]
+        expected = self.df.iloc[[4,0,1,5]]
+        assert_frame_equal(result, expected)
+
+        result = self.df2.loc[['a','b','e']]
+        expected = DataFrame({'A' : [0,1,5,2,3,np.nan],
+                              'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        # element in the categories but not in the values
+        self.assertRaises(KeyError, lambda : self.df2.loc['e'])
+
+        # assign is ok
+        df = self.df2.copy()
+        df.loc['e'] = 20
+        result = df.loc[['a','b','e']]
+        expected = DataFrame({'A' : [0,1,5,2,3,20],
+                              'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        df = self.df2.copy()
+        result = df.loc[['a','b','e']]
+        expected = DataFrame({'A' : [0,1,5,2,3,np.nan],
+                              'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+
+        # not all labels in the categories
+        self.assertRaises(KeyError, lambda : self.df2.loc[['a','d']])
+
+    def test_reindexing(self):
+
+        # reindexing
+        # convert to a regular index
+        result = self.df2.reindex(['a','b','e'])
+        expected = DataFrame({'A' : [0,1,5,2,3,np.nan],
+                              'B' : Series(list('aaabbe')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['a','b'])
+        expected = DataFrame({'A' : [0,1,5,2,3],
+                              'B' : Series(list('aaabb')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['e'])
+        expected = DataFrame({'A' : [np.nan],
+                              'B' : Series(['e']) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['d'])
+        expected = DataFrame({'A' : [np.nan],
+                              'B' : Series(['d']) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        # since we are actually reindexing with a Categorical
+        # then return a Categorical
+        cats = list('cabe')
+
+        result = self.df2.reindex(pd.Categorical(['a','d'],categories=cats))
+        expected = DataFrame({'A' : [0,1,5,np.nan],
+                              'B' : Series(list('aaad')).astype('category',categories=cats) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(pd.Categorical(['a'],categories=cats))
+        expected = DataFrame({'A' : [0,1,5],
+                              'B' : Series(list('aaa')).astype('category',categories=cats) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['a','b','e'])
+        expected = DataFrame({'A' : [0,1,5,2,3,np.nan],
+                              'B' : Series(list('aaabbe')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['a','b'])
+        expected = DataFrame({'A' : [0,1,5,2,3],
+                              'B' : Series(list('aaabb')) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(['e'])
+        expected = DataFrame({'A' : [np.nan],
+                              'B' : Series(['e']) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        # give back the type of categorical that we received
+        result = self.df2.reindex(pd.Categorical(['a','d'],categories=cats,ordered=True))
+        expected = DataFrame({'A' : [0,1,5,np.nan],
+                              'B' : Series(list('aaad')).astype('category',categories=cats,ordered=True) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        result = self.df2.reindex(pd.Categorical(['a','d'],categories=['a','d']))
+        expected = DataFrame({'A' : [0,1,5,np.nan],
+                              'B' : Series(list('aaad')).astype('category',categories=['a','d']) }).set_index('B')
+        assert_frame_equal(result, expected)
+
+        # passed duplicate indexers are not allowed
+        self.assertRaises(ValueError, lambda : self.df2.reindex(['a','a']))
+
+        # args NotImplemented ATM
+        self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],method='ffill'))
+        self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],level=1))
+        self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],limit=2))
+
+    def test_loc_slice(self):
+
+        # slicing
+        # not implemented ATM
+        # GH9748
+
+        self.assertRaises(TypeError, lambda : self.df.loc[1:5])
+
+        #result = df.loc[1:5]
+        #expected = df.iloc[[1,2,3,4]]
+        #assert_frame_equal(result, expected)
+
+    def test_boolean_selection(self):
+
+        df3 = self.df3
+        df4 = self.df4
+
+        result = df3[df3.index == 'a']
+        expected = df3.iloc[[]]
+        assert_frame_equal(result,expected)
+
+        result = df4[df4.index == 'a']
+        expected = df4.iloc[[]]
+        assert_frame_equal(result,expected)
+
+        result = df3[df3.index == 1]
+        expected = df3.iloc[[0,1,3]]
+        assert_frame_equal(result,expected)
+
+        result = df4[df4.index == 1]
+        expected = df4.iloc[[0,1,3]]
+        assert_frame_equal(result,expected)
+
+        # since we have an ordered categorical
+
+        # CategoricalIndex([1, 1, 2, 1, 3, 2],
+        #         categories=[3, 2, 1],
+        #         ordered=True,
+        #         name=u'B')
+        result = df3[df3.index < 2]
+        expected = df3.iloc[[4]]
+        assert_frame_equal(result,expected)
+
+        result = df3[df3.index > 1]
+        expected = df3.iloc[[]]
+        assert_frame_equal(result,expected)
+
+        # unordered
+        # cannot be compared
+
+        # CategoricalIndex([1, 1, 2, 1, 3, 2],
+        #         categories=[3, 2, 1],
+        #         ordered=False,
+        #         name=u'B')
+        self.assertRaises(TypeError, lambda : df4[df4.index < 2])
+        self.assertRaises(TypeError, lambda : df4[df4.index > 1])
+
 class TestSeriesNoneCoercion(tm.TestCase):
     EXPECTED_RESULTS = [
         # For numeric series, we should coerce to NaN.
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index b4baedada46e1..ea7354a9334ff 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -25,11 +25,6 @@
 
 import pandas as pd
 from pandas.core.common import is_sequence, array_equivalent, is_list_like
-import pandas.core.index as index
-import pandas.core.series as series
-import pandas.core.frame as frame
-import pandas.core.panel as panel
-import pandas.core.panel4d as panel4d
 import pandas.compat as compat
 from pandas.compat import(
     filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
@@ -38,24 +33,12 @@
 
 from pandas.computation import expressions as expr
 
-from pandas import bdate_range
-from pandas.tseries.index import DatetimeIndex
-from pandas.tseries.tdi import TimedeltaIndex
-from pandas.tseries.period import PeriodIndex
+from pandas import (bdate_range, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex,
+                    Index, MultiIndex, Series, DataFrame, Panel, Panel4D)
 from pandas.util.decorators import deprecate
-
 from pandas import _testing
-
-
 from pandas.io.common import urlopen
 
-Index = index.Index
-MultiIndex = index.MultiIndex
-Series = series.Series
-DataFrame = frame.DataFrame
-Panel = panel.Panel
-Panel4D = panel4d.Panel4D
-
 N = 30
 K = 4
 _RAISE_NETWORK_ERROR_DEFAULT = False
@@ -550,16 +533,14 @@ def assert_equal(a, b, msg=""):
     assert a == b, "%s: %r != %r" % (msg.format(a,b), a, b)
 
 
-def assert_index_equal(left, right):
+def assert_index_equal(left, right, exact=False):
     assert_isinstance(left, Index, '[index] ')
     assert_isinstance(right, Index, '[index] ')
-    if not left.equals(right):
+    if not left.equals(right) or (exact and type(left) != type(right)):
         raise AssertionError("[index] left [{0} {1}], right [{2} {3}]".format(left.dtype,
                                                                               left,
                                                                               right,
                                                                               right.dtype))
-
-
 def assert_attr_equal(attr, left, right):
     """checks attributes are equal. Both objects must have attribute."""
     left_attr = getattr(left, attr)
@@ -627,6 +608,7 @@ def assertNotIsInstance(obj, cls, msg=''):
 
 
 def assert_categorical_equal(res, exp):
+
     if not array_equivalent(res.categories, exp.categories):
         raise AssertionError(
             'categories not equivalent: {0} vs {1}.'.format(res.categories,
@@ -827,6 +809,11 @@ def makeStringIndex(k=10):
 def makeUnicodeIndex(k=10):
     return Index(randu_array(nchars=10, size=k))
 
+def makeCategoricalIndex(k=10, n=3):
+    """ make a length k index or n categories """
+    x = rands_array(nchars=4, size=n)
+    return CategoricalIndex(np.random.choice(x,k))
+
 def makeBoolIndex(k=10):
     if k == 1:
         return Index([True])

From fe1637183271e5abef1ca460691f494d327977c3 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 20 Apr 2015 08:33:59 -0400
Subject: [PATCH 093/239] DOC: fixup whatsnew for rebase issue

---
 doc/source/whatsnew/v0.16.1.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index bcd50f02c29b7..0d6bd149c0a9f 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -131,7 +131,6 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
    df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
 
 See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
->>>>>>> support CategoricalIndex
 
 .. _whatsnew_0161.api:
 

From ee37abdf92ec7c331e1b7ad51ebbb3fa5caae32d Mon Sep 17 00:00:00 2001
From: ptype <ptype@users.noreply.github.com>
Date: Mon, 20 Apr 2015 16:37:45 +0100
Subject: [PATCH 094/239] Spelling typo

documenation -> documentation
---
 doc/source/visualization.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 9d4cba2e5ee8c..43fa6ea759b33 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -267,7 +267,7 @@ You can pass other keywords supported by matplotlib ``hist``. For example, horiz
    plt.close('all')
 
 See the :meth:`hist <matplotlib.axes.Axes.hist>` method and the
-`matplotlib hist documenation <http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist>`__ for more.
+`matplotlib hist documentation <http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist>`__ for more.
 
 
 The existing interface ``DataFrame.hist`` to plot histogram still can be used.

From 2971ef81b4db29fd59dea50cfc352513e625dc80 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 18 Apr 2015 11:57:03 -0500
Subject: [PATCH 095/239] BUG: hidden ticklabels with sharex and secondary

---
 doc/source/whatsnew/v0.16.1.txt |  6 ++++++
 pandas/tests/test_graphics.py   | 13 +++++++++++++
 pandas/tools/plotting.py        |  5 ++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0d6bd149c0a9f..6ff090e595fec 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -330,4 +330,10 @@ Bug Fixes
 >>>>>>> 4d1268e... BUG: Fixed latex output for multi-indexed dataframes - GH9778
 =======
 - Bug causing an exception when setting an empty range using ``DataFrame.loc`` (:issue:`9596`)
+<<<<<<< HEAD
 >>>>>>> a21f2ce... BUG: Exception when setting an empty range using DataFrame.loc
+=======
+
+
+- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
+>>>>>>> d3ccb70... BUG: hidden ticklabels with sharex and secondary
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 7ec57c0304530..33c88b0e3b4b7 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -1534,6 +1534,19 @@ def test_subplots_ts_share_axes(self):
         for ax in axes[[0, 1, 2], [2]].ravel():
             self._check_visible(ax.get_yticklabels(), visible=False)
 
+    def test_subplots_sharex_axes_existing_axes(self):
+        # GH 9158
+        d = {'A': [1., 2., 3., 4.], 'B': [4., 3., 2., 1.], 'C': [5, 1, 3, 4]}
+        df = DataFrame(d, index=date_range('2014 10 11', '2014 10 14'))
+
+        axes = df[['A', 'B']].plot(subplots=True)
+        df['C'].plot(ax=axes[0], secondary_y=True)
+
+        self._check_visible(axes[0].get_xticklabels(), visible=False)
+        self._check_visible(axes[1].get_xticklabels(), visible=True)
+        for ax in axes.ravel():
+            self._check_visible(ax.get_yticklabels(), visible=True)
+
     def test_negative_log(self):
         df = - DataFrame(rand(6, 4),
                        index=list(string.ascii_letters[:6]),
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 513f165af4686..268bd306585ad 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -1040,7 +1040,10 @@ def _adorn_subplots(self):
         if len(self.axes) > 0:
             all_axes = self._get_axes()
             nrows, ncols = self._get_axes_layout()
-            _handle_shared_axes(all_axes, len(all_axes), len(all_axes), nrows, ncols, self.sharex, self.sharey)
+            _handle_shared_axes(axarr=all_axes, nplots=len(all_axes),
+                                naxes=nrows * ncols, nrows=nrows,
+                                ncols=ncols, sharex=self.sharex,
+                                sharey=self.sharey)
 
         for ax in to_adorn:
             if self.yticks is not None:

From 586fa41613e5939c299aa8836e5849185f93defa Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 13 Apr 2015 22:17:26 -0700
Subject: [PATCH 096/239] add/delete str/dt/cat dynamically from __dir__ (fix
 for #9627)

---
 doc/source/whatsnew/v0.16.1.txt |  2 ++
 pandas/core/base.py             | 24 ++++++++++++++++++++----
 pandas/core/generic.py          |  6 +++---
 pandas/core/groupby.py          |  4 ++--
 pandas/core/series.py           | 15 +++++++++++++++
 pandas/tests/test_index.py      |  8 ++++++++
 pandas/tests/test_series.py     | 20 ++++++++++++++++++++
 7 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 6ff090e595fec..ec38125bd4e53 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -62,6 +62,8 @@ Enhancements
 
 - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
 
+- Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
+
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
 .. _whatsnew_0161.enhancements.categoricalindex:
diff --git a/pandas/core/base.py b/pandas/core/base.py
index c0233a5a33308..9c27f3c7a2cc3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -86,16 +86,22 @@ def __unicode__(self):
         # Should be overwritten by base classes
         return object.__repr__(self)
 
-    def _local_dir(self):
-        """ provide addtional __dir__ for this object """
-        return []
+    def _dir_additions(self):
+        """ add addtional __dir__ for this object """
+        return set()
+
+    def _dir_deletions(self):
+        """ delete unwanted __dir__ for this object """
+        return set()
 
     def __dir__(self):
         """
         Provide method name lookup and completion
         Only provide 'public' methods
         """
-        return list(sorted(list(set(dir(type(self)) + self._local_dir()))))
+        rv = set(dir(type(self)))
+        rv = (rv - self._dir_deletions()) | self._dir_additions()
+        return sorted(rv)
 
     def _reset_cache(self, key=None):
         """
@@ -518,6 +524,16 @@ def _make_str_accessor(self):
 
     str = AccessorProperty(StringMethods, _make_str_accessor)
 
+    def _dir_additions(self):
+        return set()
+
+    def _dir_deletions(self):
+        try:
+            getattr(self, 'str')
+        except AttributeError:
+            return set(['str'])
+        return set()
+
     _shared_docs['drop_duplicates'] = (
         """Return %(klass)s with duplicate values removed
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f3d7c48c7d1f1..681cfc0f7a416 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -146,10 +146,10 @@ def __unicode__(self):
         prepr = '[%s]' % ','.join(map(com.pprint_thing, self))
         return '%s(%s)' % (self.__class__.__name__, prepr)
 
-    def _local_dir(self):
+    def _dir_additions(self):
         """ add the string-like attributes from the info_axis """
-        return [c for c in self._info_axis
-                if isinstance(c, string_types) and isidentifier(c)]
+        return set([c for c in self._info_axis
+                if isinstance(c, string_types) and isidentifier(c)])
 
     @property
     def _constructor_sliced(self):
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index e5b1a96f81677..afdfe98b79a97 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -498,8 +498,8 @@ def _set_result_index_ordered(self, result):
         result.index = self.obj.index
         return result
 
-    def _local_dir(self):
-        return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))
+    def _dir_additions(self):
+        return self.obj._dir_additions() | self._apply_whitelist
 
     def __getattr__(self, attr):
         if attr in self._internal_names_set:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 685d44acafe53..4ad5e06693221 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2521,6 +2521,21 @@ def _make_cat_accessor(self):
 
     cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor)
 
+    def _dir_deletions(self):
+        return self._accessors
+
+    def _dir_additions(self):
+        rv = set()
+        # these accessors are mutually exclusive, so break loop when one exists
+        for accessor in self._accessors:
+            try:
+                getattr(self, accessor)
+                rv.add(accessor)
+                break
+            except AttributeError:
+                pass
+        return rv
+
 Series._setup_axes(['index'], info_axis=0, stat_axis=0,
                    aliases={'rows': 0})
 Series._add_numeric_operations()
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 1d59d1f3fbfe3..3c9dbd2e48cb6 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1283,6 +1283,14 @@ def test_str_attribute(self):
         expected = Series(range(2), index=['a1', 'a2'])
         tm.assert_series_equal(s[s.index.str.startswith('a')], expected)
 
+    def test_tab_completion(self):
+        # GH 9910
+        idx = Index(list('abcd'))
+        self.assertTrue('str' in dir(idx))
+
+        idx = Index(range(4))
+        self.assertTrue('str' not in dir(idx))
+
     def test_indexing_doesnt_change_class(self):
         idx = Index([1, 2, 3, 'a', 'b', 'c'])
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index b5ada4cf39b5e..f1a9e23796804 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -242,6 +242,26 @@ def test_dt_accessor_api(self):
                 s.dt
             self.assertFalse(hasattr(s, 'dt'))
 
+    def test_tab_completion(self):
+        # GH 9910
+        s = Series(list('abcd'))
+        # Series of str values should have .str but not .dt/.cat in __dir__
+        self.assertTrue('str' in dir(s))
+        self.assertTrue('dt' not in dir(s))
+        self.assertTrue('cat' not in dir(s))
+
+        # similiarly for .dt
+        s = Series(date_range('1/1/2015', periods=5))
+        self.assertTrue('dt' in dir(s))
+        self.assertTrue('str' not in dir(s))
+        self.assertTrue('cat' not in dir(s))
+
+        # similiarly for .cat
+        s = Series(list('abbcd'), dtype="category")
+        self.assertTrue('cat' in dir(s))
+        self.assertTrue('str' not in dir(s))
+        self.assertTrue('dt' not in dir(s))
+
     def test_binop_maybe_preserve_name(self):
 
         # names match, preserve

From 844793c35dfa6b9a1342bc635748ee9d2a2edf1c Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 20 Apr 2015 14:45:18 -0700
Subject: [PATCH 097/239] Allow add_categories() to accept Series/np.array

---
 doc/source/whatsnew/v0.16.1.txt  |  1 +
 pandas/core/categorical.py       |  2 +-
 pandas/tests/test_categorical.py | 13 +++++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index ec38125bd4e53..6ecf4b7fa7625 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -61,6 +61,7 @@ Enhancements
 - Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
 
 - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
+- Allow ``Categorical.add_categories`` to accept `Series` or `np.array`. (:issue:`9927`)
 
 - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 9537523380350..caf706fcbcbbd 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -708,7 +708,7 @@ def add_categories(self, new_categories, inplace=False):
         if len(already_included) != 0:
             msg = "new categories must not include old categories: %s" % str(already_included)
             raise ValueError(msg)
-        new_categories = list(self._categories) + (new_categories)
+        new_categories = list(self._categories) + list(new_categories)
         new_categories = self._validate_categories(new_categories)
         cat = self if inplace else self.copy()
         cat._categories = new_categories
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 97fa442595893..6a6564347d35f 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -757,6 +757,19 @@ def f():
             cat.add_categories(["d"])
         self.assertRaises(ValueError, f)
 
+        # GH 9927
+        cat = Categorical(list("abc"), ordered=True)
+        expected = Categorical(list("abc"), categories=list("abcde"), ordered=True)
+        # test with Series, np.array, index, list
+        res = cat.add_categories(Series(["d", "e"]))
+        self.assert_categorical_equal(res, expected)
+        res = cat.add_categories(np.array(["d", "e"]))
+        self.assert_categorical_equal(res, expected)
+        res = cat.add_categories(Index(["d", "e"]))
+        self.assert_categorical_equal(res, expected)
+        res = cat.add_categories(["d", "e"])
+        self.assert_categorical_equal(res, expected)
+
     def test_remove_categories(self):
         cat = Categorical(["a","b","c","a"], ordered=True)
         old = cat.copy()

From 7340826164aacb377503c60d3468a7cb76835832 Mon Sep 17 00:00:00 2001
From: Tom Ajamian <tc4484@gmail.com>
Date: Fri, 17 Apr 2015 11:27:53 -0400
Subject: [PATCH 098/239] TST: adding sdist and install tests to the 2.6
 regression - closes #9878

---
 .travis.yml        |  2 ++
 ci/install_test.sh | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100755 ci/install_test.sh

diff --git a/.travis.yml b/.travis.yml
index bc87853b26d6e..0d143d7f7133b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,6 +22,7 @@ matrix:
       - LOCALE_OVERRIDE="it_IT.UTF-8"
       - BUILD_TYPE=conda
       - JOB_NAME: "26_nslow_nnet"
+      - INSTALL_TEST=true
     - python: 2.7
       env:
       - NOSE_ARGS="slow and not network and not disabled"
@@ -183,6 +184,7 @@ script:
 # nothing here, or failed tests won't fail travis
 
 after_script:
+  - ci/install_test.sh
   - if [ -f /tmp/doc.log ]; then cat /tmp/doc.log; fi
   - source activate pandas && ci/print_versions.py
   - ci/print_skipped.py /tmp/nosetests.xml
diff --git a/ci/install_test.sh b/ci/install_test.sh
new file mode 100755
index 0000000000000..e01ad7b94a349
--- /dev/null
+++ b/ci/install_test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+echo "inside $0"
+
+if [ "$INSTALL_TEST" ]; then
+    source activate pandas
+    echo "Starting installation test."
+    conda uninstall cython || exit 1
+    python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1
+    pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1
+    nosetests --exe -A "$NOSE_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml
+else
+    echo "Skipping installation test."
+fi
+RET="$?"
+
+exit "$RET"

From baa3f54ef84cdd677e1e030d4cc3a3e4a80cf248 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Wed, 22 Apr 2015 22:40:16 +0900
Subject: [PATCH 099/239] DOC: Release note link and format correction

---
 doc/source/whatsnew/v0.16.1.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 6ecf4b7fa7625..9bb79b19152fd 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -9,7 +9,7 @@ We recommend that all users upgrade to this version.
 
 Highlights include:
 
-- Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161`.enhancements.categoricalindex>`
+- Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
 
 .. contents:: What's new in v0.16.1
     :local:
@@ -24,15 +24,15 @@ Enhancements
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
 
-  The `.str` accessor is now available for both `Series` and `Index`.
+  The ``.str`` accessor is now available for both ``Series`` and ``Index``.
 
   .. ipython:: python
 
      idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
      idx.str.strip()
 
-  One special case for the `.str` accessor on `Index` is that if a string method returns `bool`, the `.str` accessor
-  will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression
+  One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor
+  will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression
   to work naturally:
 
 
@@ -46,7 +46,7 @@ Enhancements
 
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
 
-- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
+- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`)
 
   .. ipython:: python
 
@@ -61,7 +61,7 @@ Enhancements
 - Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
 
 - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
-- Allow ``Categorical.add_categories`` to accept `Series` or `np.array`. (:issue:`9927`)
+- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
 
 - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
 
@@ -87,7 +87,7 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv
    df.dtypes
    df.B.cat.categories
 
-setting the index, will create create a CategoricalIndex
+setting the index, will create create a ``CategoricalIndex``
 
 .. ipython :: python
 
@@ -179,7 +179,7 @@ Bug Fixes
 
 - Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated.
 - Bug in json serialization when frame has length zero.(:issue:`9805`)
-- Bug in `read_csv` where missing trailing delimiters would cause segfault. (:issue:`5664`)
+- Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`)
 - Bug in retaining index name on appending (:issue:`9862`)
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
 - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).

From d1355ddf1b66aa5772b4e9ca565a2cef558009de Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Fri, 24 Apr 2015 14:19:30 -0700
Subject: [PATCH 100/239] DOC: StringMethods html doc fixes

---
 doc/source/api.rst  | 1 +
 doc/source/text.rst | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index b1540ff528605..d3493a761521a 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -553,6 +553,7 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.swapcase
    Series.str.title
    Series.str.upper
+   Series.str.wrap
    Series.str.zfill
    Series.str.isalnum
    Series.str.isalpha
diff --git a/doc/source/text.rst b/doc/source/text.rst
index f417f56f51fbc..dea40fb48748d 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -266,7 +266,7 @@ Method Summary
     :meth:`~Series.str.upper`,Equivalent to ``str.upper``
     :meth:`~Series.str.find`,Equivalent to ``str.find``
     :meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
-    :meth:`~Series.str.capicalize`,Equivalent to ``str.capitalize``
+    :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
     :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
     :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
     :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
@@ -276,4 +276,4 @@ Method Summary
     :meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
     :meth:`~Series.str.istitle`,Equivalent to ``str.istitle``
     :meth:`~Series.str.isnumeric`,Equivalent to ``str.isnumeric``
-    :meth:`~Series.str.isnumeric`,Equivalent to ``str.isdecimal``
+    :meth:`~Series.str.isdecimal`,Equivalent to ``str.isdecimal``

From aee79bca1b05c94dae72bb1ef109c51c638857e3 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 15 Apr 2015 22:57:54 +0200
Subject: [PATCH 101/239] DOC: add redirects to ensure old links instead of
 building the docstring page

---
 doc/_templates/api_redirect.html |  15 ++
 doc/source/api.rst               | 227 -------------------------------
 doc/source/conf.py               |  25 +++-
 3 files changed, 39 insertions(+), 228 deletions(-)
 create mode 100644 doc/_templates/api_redirect.html

diff --git a/doc/_templates/api_redirect.html b/doc/_templates/api_redirect.html
new file mode 100644
index 0000000000000..24bdd8363830f
--- /dev/null
+++ b/doc/_templates/api_redirect.html
@@ -0,0 +1,15 @@
+{% set pgn = pagename.split('.') -%}
+{% if pgn[-2][0].isupper() -%}
+    {% set redirect = ["pandas", pgn[-2], pgn[-1], 'html']|join('.') -%}
+{% else -%}
+    {% set redirect = ["pandas", pgn[-1], 'html']|join('.') -%}
+{% endif -%}
+<html>
+    <head>
+        <meta http-equiv="Refresh" content="0; url={{ redirect }}" />
+        <title>This API page has moved</title>
+    </head>
+    <body>
+        <p>This API page has moved <a href="{{ redirect }}">here</a>.</p>
+    </body>
+</html>
\ No newline at end of file
diff --git a/doc/source/api.rst b/doc/source/api.rst
index d3493a761521a..87e9b20f97e69 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1550,230 +1550,3 @@ Working with options
    get_option
    set_option
    option_context
-
-
-..
-    HACK - see github issue #4539. To ensure old links remain valid, include
-    here the autosummaries with previous currentmodules as a comment and add
-    them to a hidden toctree (to avoid warnings):
-
-.. toctree::
-   :hidden:
-
-   generated/pandas.core.common.isnull
-   generated/pandas.core.common.notnull
-   generated/pandas.core.reshape.get_dummies
-   generated/pandas.io.clipboard.read_clipboard
-   generated/pandas.io.excel.ExcelFile.parse
-   generated/pandas.io.excel.read_excel
-   generated/pandas.io.html.read_html
-   generated/pandas.io.json.read_json
-   generated/pandas.io.parsers.read_csv
-   generated/pandas.io.parsers.read_fwf
-   generated/pandas.io.parsers.read_table
-   generated/pandas.io.pickle.read_pickle
-   generated/pandas.io.pytables.HDFStore.append
-   generated/pandas.io.pytables.HDFStore.get
-   generated/pandas.io.pytables.HDFStore.put
-   generated/pandas.io.pytables.HDFStore.select
-   generated/pandas.io.pytables.read_hdf
-   generated/pandas.io.sql.read_sql
-   generated/pandas.io.sql.read_frame
-   generated/pandas.io.sql.write_frame
-   generated/pandas.io.stata.read_stata
-   generated/pandas.stats.moments.ewma
-   generated/pandas.stats.moments.ewmcorr
-   generated/pandas.stats.moments.ewmcov
-   generated/pandas.stats.moments.ewmstd
-   generated/pandas.stats.moments.ewmvar
-   generated/pandas.stats.moments.expanding_apply
-   generated/pandas.stats.moments.expanding_corr
-   generated/pandas.stats.moments.expanding_count
-   generated/pandas.stats.moments.expanding_cov
-   generated/pandas.stats.moments.expanding_kurt
-   generated/pandas.stats.moments.expanding_mean
-   generated/pandas.stats.moments.expanding_median
-   generated/pandas.stats.moments.expanding_quantile
-   generated/pandas.stats.moments.expanding_skew
-   generated/pandas.stats.moments.expanding_std
-   generated/pandas.stats.moments.expanding_sum
-   generated/pandas.stats.moments.expanding_var
-   generated/pandas.stats.moments.rolling_apply
-   generated/pandas.stats.moments.rolling_corr
-   generated/pandas.stats.moments.rolling_count
-   generated/pandas.stats.moments.rolling_cov
-   generated/pandas.stats.moments.rolling_kurt
-   generated/pandas.stats.moments.rolling_mean
-   generated/pandas.stats.moments.rolling_median
-   generated/pandas.stats.moments.rolling_quantile
-   generated/pandas.stats.moments.rolling_skew
-   generated/pandas.stats.moments.rolling_std
-   generated/pandas.stats.moments.rolling_sum
-   generated/pandas.stats.moments.rolling_var
-   generated/pandas.tools.merge.concat
-   generated/pandas.tools.merge.merge
-   generated/pandas.tools.pivot.pivot_table
-   generated/pandas.tseries.tools.to_datetime
-
-..
-    .. currentmodule:: pandas.io.pickle
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_pickle
-
-    .. currentmodule:: pandas.io.parsers
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_table
-       read_csv
-       read_fwf
-
-    .. currentmodule:: pandas.io.clipboard
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_clipboard
-
-    .. currentmodule:: pandas.io.excel
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_excel
-       ExcelFile.parse
-
-    .. currentmodule:: pandas.io.json
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_json
-
-    .. currentmodule:: pandas.io.html
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_html
-
-    .. currentmodule:: pandas.io.pytables
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_hdf
-       HDFStore.put
-       HDFStore.append
-       HDFStore.get
-       HDFStore.select
-
-    .. currentmodule:: pandas.io.sql
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_sql
-       read_frame
-       write_frame
-
-    .. currentmodule:: pandas.io.stata
-
-    .. autosummary::
-       :toctree: generated/
-
-       read_stata
-       StataReader.data
-       StataReader.data_label
-       StataReader.value_labels
-       StataReader.variable_labels
-       StataWriter.write_file
-
-    .. currentmodule:: pandas.tools.pivot
-
-    .. autosummary::
-       :toctree: generated/
-
-       pivot_table
-
-    .. currentmodule:: pandas.tools.merge
-
-    .. autosummary::
-       :toctree: generated/
-
-       merge
-       concat
-
-    .. currentmodule:: pandas.core.reshape
-
-    .. autosummary::
-       :toctree: generated/
-
-       get_dummies
-
-    .. currentmodule:: pandas.core.common
-
-    .. autosummary::
-       :toctree: generated/
-
-       isnull
-       notnull
-
-    .. currentmodule:: pandas.tseries.tools
-
-    .. autosummary::
-       :toctree: generated/
-
-       to_datetime
-
-
-    .. currentmodule:: pandas.stats.moments
-
-    .. autosummary::
-       :toctree: generated/
-
-       rolling_count
-       rolling_sum
-       rolling_mean
-       rolling_median
-       rolling_var
-       rolling_std
-       rolling_corr
-       rolling_cov
-       rolling_skew
-       rolling_kurt
-       rolling_apply
-       rolling_quantile
-
-
-    .. currentmodule:: pandas.stats.moments
-
-    .. autosummary::
-       :toctree: generated/
-
-       expanding_count
-       expanding_sum
-       expanding_mean
-       expanding_median
-       expanding_var
-       expanding_std
-       expanding_corr
-       expanding_cov
-       expanding_skew
-       expanding_kurt
-       expanding_apply
-       expanding_quantile
-
-
-    .. autosummary::
-       :toctree: generated/
-
-       ewma
-       ewmstd
-       ewmvar
-       ewmcorr
-       ewmcov
diff --git a/doc/source/conf.py b/doc/source/conf.py
index fcb9c3fdd0016..08fc8483762ab 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -211,7 +211,30 @@
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-# html_additional_pages = {}
+
+# Add redirect for previously existing API pages (which are now included in
+# the API pages as top-level functions) based on a template (GH9911) 
+moved_api_pages = [
+    'pandas.core.common.isnull', 'pandas.core.common.notnull', 'pandas.core.reshape.get_dummies',
+    'pandas.tools.merge.concat', 'pandas.tools.merge.merge', 'pandas.tools.pivot.pivot_table',
+    'pandas.tseries.tools.to_datetime', 'pandas.io.clipboard.read_clipboard', 'pandas.io.excel.ExcelFile.parse',
+    'pandas.io.excel.read_excel', 'pandas.io.html.read_html', 'pandas.io.json.read_json',
+    'pandas.io.parsers.read_csv', 'pandas.io.parsers.read_fwf', 'pandas.io.parsers.read_table',
+    'pandas.io.pickle.read_pickle', 'pandas.io.pytables.HDFStore.append', 'pandas.io.pytables.HDFStore.get',
+    'pandas.io.pytables.HDFStore.put', 'pandas.io.pytables.HDFStore.select', 'pandas.io.pytables.read_hdf',
+    'pandas.io.sql.read_sql', 'pandas.io.sql.read_frame', 'pandas.io.sql.write_frame',
+    'pandas.io.stata.read_stata', 'pandas.stats.moments.ewma', 'pandas.stats.moments.ewmcorr',
+    'pandas.stats.moments.ewmcov', 'pandas.stats.moments.ewmstd', 'pandas.stats.moments.ewmvar',
+    'pandas.stats.moments.expanding_apply', 'pandas.stats.moments.expanding_corr', 'pandas.stats.moments.expanding_count',
+    'pandas.stats.moments.expanding_cov', 'pandas.stats.moments.expanding_kurt', 'pandas.stats.moments.expanding_mean',
+    'pandas.stats.moments.expanding_median', 'pandas.stats.moments.expanding_quantile', 'pandas.stats.moments.expanding_skew',
+    'pandas.stats.moments.expanding_std', 'pandas.stats.moments.expanding_sum', 'pandas.stats.moments.expanding_var',
+    'pandas.stats.moments.rolling_apply', 'pandas.stats.moments.rolling_corr', 'pandas.stats.moments.rolling_count',
+    'pandas.stats.moments.rolling_cov', 'pandas.stats.moments.rolling_kurt', 'pandas.stats.moments.rolling_mean',
+    'pandas.stats.moments.rolling_median', 'pandas.stats.moments.rolling_quantile', 'pandas.stats.moments.rolling_skew',
+    'pandas.stats.moments.rolling_std', 'pandas.stats.moments.rolling_sum', 'pandas.stats.moments.rolling_var']
+
+html_additional_pages = {'generated/' + page: 'api_redirect.html' for page in moved_api_pages}
 
 # If false, no module index is generated.
 html_use_modindex = True

From dd76614104b2a0cc0d63b9373a88cb75590e9dc8 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 25 Apr 2015 11:54:08 -0700
Subject: [PATCH 102/239] DOC/CLN: HTML docs fixes

---
 doc/source/enhancingperf.rst |  8 ++++----
 doc/source/remote_data.rst   | 25 ++++++++++++-------------
 doc/source/timedeltas.rst    | 10 +++++-----
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
index e6b735173110b..d007446a5b922 100644
--- a/doc/source/enhancingperf.rst
+++ b/doc/source/enhancingperf.rst
@@ -66,7 +66,7 @@ Here's the function in pure python:
            s += f(a + i * dx)
        return s * dx
 
-We achieve our result by by using ``apply`` (row-wise):
+We achieve our result by using ``apply`` (row-wise):
 
 .. ipython:: python
 
@@ -86,7 +86,7 @@ hence we'll concentrate our efforts cythonizing these two functions.
 .. note::
 
   In python 2 replacing the ``range`` with its generator counterpart (``xrange``)
-  would mean the ``range`` line would vanish. In python 3 range is already a generator.
+  would mean the ``range`` line would vanish. In python 3 ``range`` is already a generator.
 
 .. _enhancingperf.plain:
 
@@ -248,7 +248,7 @@ efforts here.
 More advanced techniques
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-There is still scope for improvement, here's an example of using some more
+There is still hope for improvement. Here's an example of using some more
 advanced cython techniques:
 
 .. ipython::
@@ -373,7 +373,7 @@ This Python syntax is **not** allowed:
 :func:`~pandas.eval` Examples
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-:func:`pandas.eval` works well with expressions containing large arrays
+:func:`pandas.eval` works well with expressions containing large arrays.
 
 First let's create a few decent-sized arrays to play with:
 
diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst
index ac9b6c9aecc4a..65fcf600cdfd2 100644
--- a/doc/source/remote_data.rst
+++ b/doc/source/remote_data.rst
@@ -49,7 +49,7 @@ Yahoo! Finance
     import datetime
     start = datetime.datetime(2010, 1, 1)
     end = datetime.datetime(2013, 1, 27)
-    f=web.DataReader("F", 'yahoo', start, end)
+    f = web.DataReader("F", 'yahoo', start, end)
     f.ix['2010-01-04']
 
 .. _remote_data.yahoo_options:
@@ -58,10 +58,10 @@ Yahoo! Finance Options
 ----------------------
 ***Experimental***
 
-The Options class allows the download of options data from Yahoo! Finance.
+The ``Options`` class allows the download of options data from Yahoo! Finance.
 
 The ``get_all_data`` method downloads and caches option data for all expiry months
-and provides a formatted ``DataFrame`` with a hierarchical index, so its easy to get
+and provides a formatted ``DataFrame`` with a hierarchical index, so it is easy to get
 to the specific option you want.
 
 .. ipython:: python
@@ -71,10 +71,10 @@ to the specific option you want.
       data = aapl.get_all_data()
       data.iloc[0:5, 0:5]
 
-      #Show the $100 strike puts at all expiry dates:
+      # Show the $100 strike puts at all expiry dates:
       data.loc[(100, slice(None), 'put'),:].iloc[0:5, 0:5]
 
-      #Show the volume traded of $100 strike puts at all expiry dates:
+      # Show the volume traded of $100 strike puts at all expiry dates:
       data.loc[(100, slice(None), 'put'),'Vol'].head()
 
 If you don't want to download all the data, more specific requests can be made.
@@ -121,7 +121,7 @@ Google Finance
     import datetime
     start = datetime.datetime(2010, 1, 1)
     end = datetime.datetime(2013, 1, 27)
-    f=web.DataReader("F", 'google', start, end)
+    f = web.DataReader("F", 'google', start, end)
     f.ix['2010-01-04']
 
 .. _remote_data.fred:
@@ -152,7 +152,7 @@ Dataset names are listed at `Fama/French Data Library
 .. ipython:: python
 
     import pandas.io.data as web
-    ip=web.DataReader("5_Industry_Portfolios", "famafrench")
+    ip = web.DataReader("5_Industry_Portfolios", "famafrench")
     ip[4].ix[192607]
 
 .. _remote_data.wb:
@@ -302,9 +302,8 @@ Problematic Country Codes & Indicators
    :func:`wb.download()` is more flexible.  To achieve this, the warning
    and exception logic changed.
    
-The world bank converts some country codes,
-in their response, which makes error checking by pandas difficult.
-Retired indicators still persist in the search.
+The world bank converts some country codes in their response, which makes error
+checking by pandas difficult. Retired indicators still persist in the search.
 
 Given the new flexibility of 0.15.1, improved error handling by the user
 may be necessary for fringe cases.
@@ -377,13 +376,13 @@ The following will fetch users and pageviews (metrics) data per day of the week,
         filters     = "pagePath=~aboutus;ga:country==France",
     )
 
-The only mandatory arguments are ``metrics,`` ``dimensions`` and ``start_date``. We can only strongly recommend you to always specify the ``account_id``, ``profile_id`` and ``property_id`` to avoid accessing the wrong data bucket in Google Analytics.
+The only mandatory arguments are ``metrics,`` ``dimensions`` and ``start_date``. We strongly recommend that you always specify the ``account_id``, ``profile_id`` and ``property_id`` to avoid accessing the wrong data bucket in Google Analytics.
 
 The ``index_col`` argument indicates which dimension(s) has to be taken as index.
 
-The ``filters`` argument indicates the filtering to apply to the query. In the above example, the page has URL has to contain ``aboutus`` AND the visitors country has to be France.
+The ``filters`` argument indicates the filtering to apply to the query. In the above example, the page URL has to contain ``aboutus`` AND the visitors country has to be France.
 
-Detailed informations in the followings:
+Detailed information in the following:
 
 * `pandas & google analytics, by yhat <http://blog.yhathq.com/posts/pandas-google-analytics.html>`__
 * `Google Analytics integration in pandas, by Chang She <http://quantabee.wordpress.com/2012/12/17/google-analytics-pandas/>`__
diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst
index 786a46d343be1..8215414e425fe 100644
--- a/doc/source/timedeltas.rst
+++ b/doc/source/timedeltas.rst
@@ -29,13 +29,13 @@ Time Deltas
    Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
    but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
 
-Timedeltas are differences in times, expressed in difference units, e.g. days,hours,minutes,seconds.
+Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds.
 They can be both positive and negative.
 
 Parsing
 -------
 
-You can construct a ``Timedelta`` scalar thru various arguments:
+You can construct a ``Timedelta`` scalar through various arguments:
 
 .. ipython:: python
 
@@ -46,7 +46,7 @@ You can construct a ``Timedelta`` scalar thru various arguments:
    Timedelta('-1 days 2 min 3us')
 
    # like datetime.timedelta
-   # note: these MUST be specified as keyword argments
+   # note: these MUST be specified as keyword arguments
    Timedelta(days=1,seconds=1)
 
    # integers with a unit
@@ -100,7 +100,7 @@ It will construct Series if the input is a Series, a scalar if the input is scal
 Operations
 ----------
 
-You can operate on Series/DataFrames and construct ``timedelta64[ns]`` Series thru
+You can operate on Series/DataFrames and construct ``timedelta64[ns]`` Series through
 subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``.
 
 .. ipython:: python
@@ -290,7 +290,7 @@ TimedeltaIndex
 
 .. versionadded:: 0.15.0
 
-To generate an index with time delta, you can use either the TimedeltaIndex or
+To generate an index with time delta, you can use either the ``TimedeltaIndex`` or
 the ``timedelta_range`` constructor.
 
 Using ``TimedeltaIndex`` you can pass string-like, ``Timedelta``, ``timedelta``,

From 527fdbcde347a8f2754148404043f3548bbd3a1c Mon Sep 17 00:00:00 2001
From: Leo Razoumov <slonik.az@gmail.com>
Date: Sun, 26 Apr 2015 19:02:47 -0400
Subject: [PATCH 103/239] BUGFIX: length_of_indexer() can return incorrect
 values that break slice assignments.

BUG DESCRIPTION:
length_of_indexer() (defined in pandas/core/indexing.py) returns incorrect result
if (stop-start) is not divisible by step. As a consequence some slice assignments
throw exceptions. Affected panda versions: 0.16.x, 0.15.x, current git master.

HOW TO REPRODUCE:

import pandas as pd
sr= pd.Series([-1]*6) # series with 6 elements indexed from 0 to 5.
sr[0::2]= [0,2,4] # setting even elements works fine!
sr[1::2]= [1,3,5] # setting odd elements results in error:

.../pandas/core/internals.pyc in setitem(self, indexer, value)
    568             if is_list_like(value) and l:
    569                 if len(value) != length_of_indexer(indexer, values):
--> 570                     raise ValueError("cannot set using a slice indexer with a "
    571                                      "different length than the value")
    572
ValueError: cannot set using a slice indexer with a different length than the value
---
 pandas/core/indexing.py       | 4 ++--
 pandas/tests/test_indexing.py | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 41950bf8b0e88..7c373b0a2b01d 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1591,8 +1591,8 @@ def length_of_indexer(indexer, target=None):
         if step is None:
             step = 1
         elif step < 0:
-            step = abs(step)
-        return (stop - start) / step
+            step = -step
+        return (stop - start + step-1) // step
     elif isinstance(indexer, (ABCSeries, Index, np.ndarray, list)):
         return len(indexer)
     elif not is_list_like_indexer(indexer):
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 3872f79df7286..19ed799853ed4 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -1438,6 +1438,13 @@ def test_iloc_setitem_series(self):
         result = s.iloc[:4]
         assert_series_equal(result, expected)
 
+        s= Series([-1]*6)
+        s.iloc[0::2]= [0,2,4]
+        s.iloc[1::2]= [1,3,5]
+        result  = s
+        expected= Series([0,1,2,3,4,5])
+        assert_series_equal(result, expected)
+
     def test_iloc_setitem_list_of_lists(self):
 
         # GH 7551

From 067adcffcc55bf1c5594f95c0fd54ce43950bc33 Mon Sep 17 00:00:00 2001
From: Leo Razoumov <slonik.az@gmail.com>
Date: Sun, 26 Apr 2015 21:07:25 -0400
Subject: [PATCH 104/239] Add bugfix note to whatsnew in v0.16.1 release.

---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 9bb79b19152fd..8cdfa4b678e11 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -255,6 +255,7 @@ Bug Fixes
 >>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 
 - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
+- Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
 
 
From 5a39e85f891096e4bca9df53ed4fe61acb4b1bc8 Mon Sep 17 00:00:00 2001
From: Jonas Abernot <jonas.abernot@free.fr>
Date: Thu, 19 Mar 2015 14:21:08 +0100
Subject: [PATCH 105/239] fix issue #9680

---
 doc/source/whatsnew/v0.16.1.txt   | 6 +-----
 pandas/tseries/base.py            | 2 +-
 pandas/tseries/tests/test_base.py | 7 +++++++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 8cdfa4b678e11..59b2a70bdfb96 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -261,11 +261,7 @@ Bug Fixes
 
 
-
-
-
-
-
+- Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`)
 
 
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 9da71423b3daa..f15de87dbd81c 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -67,7 +67,7 @@ def __contains__(self, key):
         try:
             res = self.get_loc(key)
             return np.isscalar(res) or type(res) == slice or np.any(res)
-        except (KeyError, TypeError):
+        except (KeyError, TypeError, ValueError):
             return False
 
     @property
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index c42802bdb31ad..6bdff5aab3cfd 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -745,6 +745,13 @@ def test_nonunique_contains(self):
                                         ['00:01:00', '00:01:00', '00:00:01'])):
             tm.assertIn(idx[0], idx)
 
+    def test_unknown_attribute(self):
+        #GH 9680
+        tdi = pd.timedelta_range(start=0,periods=10,freq='1s')
+        ts = pd.Series(np.random.normal(size=10),index=tdi)
+        self.assertNotIn('foo',ts.__dict__.keys())
+        self.assertRaises(AttributeError,lambda : ts.foo)
+
 
 class TestPeriodIndexOps(Ops):
 

From b0c605929c36fbab2152c4306ee79b54bc7883b6 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Wed, 8 Apr 2015 20:55:12 -0400
Subject: [PATCH 106/239] BUG: read_csv skips lines with initial whitespace +
 one non-space character (GH9710)

---
 doc/source/whatsnew/v0.16.1.txt |  5 ++++
 pandas/io/tests/test_parsers.py | 52 +++++++++++++++++++++++++++++++++
 pandas/src/parser/tokenizer.c   | 14 +++++----
 3 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 59b2a70bdfb96..9f93dbde5d32c 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -210,8 +210,12 @@ Bug Fixes
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
 =======
+=======
+
+>>>>>>> 2997e70... BUG: read_csv skips lines with initial whitespace + one non-space character (GH9710)
 - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -256,6 +260,7 @@ Bug Fixes
 
 - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
 - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
+- Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
 
 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 60eb6539b2030..acee0effb8c05 100755
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2274,6 +2274,20 @@ def test_nrows_and_chunksize_raises_notimplemented(self):
         self.assertRaises(NotImplementedError, self.read_csv, StringIO(data),
                      nrows=10, chunksize=5)
 
+    def test_single_char_leading_whitespace(self):
+        # GH 9710
+        data = """\
+MyColumn
+   a
+   b
+   a
+   b\n"""
+
+        expected = DataFrame({'MyColumn' : list('abab')})
+
+        result = self.read_csv(StringIO(data), skipinitialspace=True)
+        tm.assert_frame_equal(result, expected)
+
 
 class TestPythonParser(ParserTests, tm.TestCase):
     def test_negative_skipfooter_raises(self):
@@ -3333,6 +3347,25 @@ def test_buffer_overflow(self):
             except Exception as cperr:
                 self.assertIn('Buffer overflow caught - possible malformed input file.', str(cperr))
 
+    def test_single_char_leading_whitespace(self):
+        # GH 9710
+        data = """\
+MyColumn
+   a
+   b
+   a
+   b\n"""
+
+        expected = DataFrame({'MyColumn' : list('abab')})
+
+        result = self.read_csv(StringIO(data), delim_whitespace=True,
+                               skipinitialspace=True)
+        tm.assert_frame_equal(result, expected)
+
+        result = self.read_csv(StringIO(data), lineterminator='\n',
+                               skipinitialspace=True)
+        tm.assert_frame_equal(result, expected)
+
 class TestCParserLowMemory(ParserTests, tm.TestCase):
 
     def read_csv(self, *args, **kwds):
@@ -3754,6 +3787,25 @@ def test_buffer_overflow(self):
             except Exception as cperr:
                 self.assertIn('Buffer overflow caught - possible malformed input file.', str(cperr))
 
+    def test_single_char_leading_whitespace(self):
+        # GH 9710
+        data = """\
+MyColumn
+   a
+   b
+   a
+   b\n"""
+
+        expected = DataFrame({'MyColumn' : list('abab')})
+
+        result = self.read_csv(StringIO(data), delim_whitespace=True,
+                               skipinitialspace=True)
+        tm.assert_frame_equal(result, expected)
+
+        result = self.read_csv(StringIO(data), lineterminator='\n',
+                               skipinitialspace=True)
+        tm.assert_frame_equal(result, expected)
+
 class TestMiscellaneous(tm.TestCase):
 
     # for tests that don't fit into any of the other classes, e.g. those that
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 1850aab50b55a..e7b5db9c5e361 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -849,10 +849,11 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
                 ;
             else { // backtrack
                 /* We have to use i + 1 because buf has been incremented but not i */
-                while (i + 1 > self->datapos && *buf != '\n') {
+                do {
                     --buf;
                     --i;
-                }
+                } while (i + 1 > self->datapos && *buf != '\n');
+
                 if (i + 1 > self->datapos) // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
@@ -1073,7 +1074,7 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
         // Next character in file
         c = *buf++;
 
-        TRACE(("Iter: %d Char: %c Line %d field_count %d, state %d\n",
+        TRACE(("tokenize_delim_customterm - Iter: %d Char: %c Line %d field_count %d, state %d\n",
                i, c, self->file_lines + 1, self->line_fields[self->lines],
                self->state));
 
@@ -1166,10 +1167,11 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
                 ;
             else { // backtrack
                 /* We have to use i + 1 because buf has been incremented but not i */
-                while (i + 1 > self->datapos && *buf != self->lineterminator) {
+                do {
                     --buf;
                     --i;
-                }
+                } while (i + 1 > self->datapos && *buf != self->lineterminator);
+
                 if (i + 1 > self->datapos) // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
@@ -1336,7 +1338,7 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
         // Next character in file
         c = *buf++;
 
-        TRACE(("Iter: %d Char: %c Line %d field_count %d, state %d\n",
+        TRACE(("tokenize_whitespace - Iter: %d Char: %c Line %d field_count %d, state %d\n",
                i, c, self->file_lines + 1, self->line_fields[self->lines],
                self->state));
 

From d8e43471f5dbbffa6f017c0917e647f86a67ac62 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@gmail.com>
Date: Thu, 19 Mar 2015 15:09:37 -0700
Subject: [PATCH 107/239] Fixes groupby.apply() error when no returns #9684

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/core/groupby.py          |  7 ++++++-
 pandas/tests/test_groupby.py    | 11 +++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 9f93dbde5d32c..de6a0748604e1 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -185,9 +185,13 @@ Bug Fixes
 - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
 - Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
+
 - Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
+
+- Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`)
+
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index afdfe98b79a97..38619229f1086 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2822,7 +2822,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
 
             # make Nones an empty object
             if com._count_not_none(*values) != len(values):
-                v = next(v for v in values if v is not None)
+                try:
+                    v = next(v for v in values if v is not None)
+                except StopIteration:
+                    # If all values are None, then this will throw an error.
+                    # We'd prefer it return an empty dataframe. 
+                    return DataFrame()
                 if v is None:
                     return DataFrame()
                 elif isinstance(v, NDFrame):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index c5a338520df21..7af53c88f0f72 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -5083,6 +5083,17 @@ def test_groupby_categorical_two_columns(self):
                          "C3":[nan,nan,nan,nan, 10,100,nan,nan, nan,nan,200,34]}, index=idx)
         tm.assert_frame_equal(res, exp)
 
+    def test_groupby_apply_all_none(self):
+        # Tests to make sure no errors if apply function returns all None 
+        # values. Issue 9684.
+        test_df = DataFrame({'groups': [0,0,1,1], 'random_vars': [8,7,4,5]})
+        
+        def test_func(x):
+            pass
+        result = test_df.groupby('groups').apply(test_func)
+        expected = DataFrame()
+        tm.assert_frame_equal(result, expected)
+
 
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()

From 4d561e875b77d026f2966fff82316052719fd3ed Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 16 Apr 2015 21:42:24 +0900
Subject: [PATCH 108/239] TST: Check index names

---
 pandas/io/tests/test_parsers.py       | 16 ++---
 pandas/io/tests/test_pytables.py      |  5 +-
 pandas/io/tests/test_sql.py           | 20 +++---
 pandas/io/tests/test_stata.py         |  4 +-
 pandas/sparse/tests/test_sparse.py    | 14 +++--
 pandas/stats/tests/test_moments.py    |  3 +-
 pandas/tests/test_frame.py            | 20 +++---
 pandas/tests/test_groupby.py          | 22 ++++---
 pandas/tests/test_index.py            |  4 +-
 pandas/tests/test_multilevel.py       | 33 +++++++---
 pandas/tests/test_panel.py            |  8 ++-
 pandas/tseries/tests/test_base.py     | 88 +++++++++++++--------------
 pandas/tseries/tests/test_resample.py | 12 ++--
 pandas/util/testing.py                | 25 +++++---
 14 files changed, 155 insertions(+), 119 deletions(-)

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index acee0effb8c05..d0289895ceda6 100755
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -273,7 +273,7 @@ def test_squeeze(self):
 b,2
 c,3
 """
-        expected = Series([1, 2, 3], ['a', 'b', 'c'])
+        expected = Series([1, 2, 3], index=Index(['a', 'b', 'c'], name=0))
         result = self.read_table(StringIO(data), sep=',', index_col=0,
                                  header=None, squeeze=True)
         tm.assert_isinstance(result, Series)
@@ -982,8 +982,8 @@ def test_yy_format(self):
                            parse_dates=[['date', 'time']])
         idx = DatetimeIndex([datetime(2009, 1, 31, 0, 10, 0),
                              datetime(2009, 2, 28, 10, 20, 0),
-                             datetime(2009, 3, 31, 8, 30, 0)]).asobject
-        idx.name = 'date_time'
+                             datetime(2009, 3, 31, 8, 30, 0)],
+                            dtype=object, name='date_time')
         xp = DataFrame({'B': [1, 3, 5], 'C': [2, 4, 6]}, idx)
         tm.assert_frame_equal(rs, xp)
 
@@ -991,8 +991,8 @@ def test_yy_format(self):
                            parse_dates=[[0, 1]])
         idx = DatetimeIndex([datetime(2009, 1, 31, 0, 10, 0),
                              datetime(2009, 2, 28, 10, 20, 0),
-                             datetime(2009, 3, 31, 8, 30, 0)]).asobject
-        idx.name = 'date_time'
+                             datetime(2009, 3, 31, 8, 30, 0)],
+                            dtype=object, name='date_time')
         xp = DataFrame({'B': [1, 3, 5], 'C': [2, 4, 6]}, idx)
         tm.assert_frame_equal(rs, xp)
 
@@ -3144,17 +3144,17 @@ def test_skiprows_lineterminator(self):
         expected = pd.DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
                                  ['2007/01/01', '02:00', 0.2141, 'M', 'O'],
                                  ['2007/01/01', '04:00', 0.2142, 'D', 'M']],
-                                columns=['date', 'time', 'var', 'flag', 
+                                columns=['date', 'time', 'var', 'flag',
                                          'oflag'])
         # test with the three default lineterminators LF, CR and CRLF
         df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
                            names=['date', 'time', 'var', 'flag', 'oflag'])
         tm.assert_frame_equal(df, expected)
-        df = self.read_csv(StringIO(data.replace('\n', '\r')), 
+        df = self.read_csv(StringIO(data.replace('\n', '\r')),
                            skiprows=1, delim_whitespace=True,
                            names=['date', 'time', 'var', 'flag', 'oflag'])
         tm.assert_frame_equal(df, expected)
-        df = self.read_csv(StringIO(data.replace('\n', '\r\n')), 
+        df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
                            skiprows=1, delim_whitespace=True,
                            names=['date', 'time', 'var', 'flag', 'oflag'])
         tm.assert_frame_equal(df, expected)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 03e7a8eae549d..ec33e53481950 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1593,9 +1593,10 @@ def make_index(names=None):
 
             # series
             _maybe_remove(store, 's')
-            s = Series(np.zeros(12), index=make_index(['date',None,None]))
+            s = Series(np.zeros(12), index=make_index(['date', None, None]))
             store.append('s',s)
-            tm.assert_series_equal(store.select('s'),s)
+            xp = Series(np.zeros(12), index=make_index(['date', 'level_1', 'level_2']))
+            tm.assert_series_equal(store.select('s'), xp)
 
             # dup with column
             _maybe_remove(store, 'df')
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index ac266dd77c984..f3f00862054e4 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -1323,19 +1323,19 @@ def test_double_precision(self):
                         'i64':Series([5,], dtype='int64'),
                         })
 
-        df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace', 
+        df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace',
             dtype={'f64_as_f32':sqlalchemy.Float(precision=23)})
         res = sql.read_sql_table('test_dtypes', self.conn)
-        
+
         # check precision of float64
-        self.assertEqual(np.round(df['f64'].iloc[0],14), 
+        self.assertEqual(np.round(df['f64'].iloc[0],14),
                          np.round(res['f64'].iloc[0],14))
 
         # check sql types
         meta = sqlalchemy.schema.MetaData(bind=self.conn)
         meta.reflect()
         col_dict = meta.tables['test_dtypes'].columns
-        self.assertEqual(str(col_dict['f32'].type), 
+        self.assertEqual(str(col_dict['f32'].type),
                          str(col_dict['f64_as_f32'].type))
         self.assertTrue(isinstance(col_dict['f32'].type, sqltypes.Float))
         self.assertTrue(isinstance(col_dict['f64'].type, sqltypes.Float))
@@ -1729,11 +1729,11 @@ def test_illegal_names(self):
         df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
 
         # Raise error on blank
-        self.assertRaises(ValueError, df.to_sql, "", self.conn, 
+        self.assertRaises(ValueError, df.to_sql, "", self.conn,
             flavor=self.flavor)
 
         for ndx, weird_name in enumerate(['test_weird_name]','test_weird_name[',
-            'test_weird_name`','test_weird_name"', 'test_weird_name\'', 
+            'test_weird_name`','test_weird_name"', 'test_weird_name\'',
             '_b.test_weird_name_01-30', '"_b.test_weird_name_01-30"']):
             df.to_sql(weird_name, self.conn, flavor=self.flavor)
             sql.table_exists(weird_name, self.conn)
@@ -1839,12 +1839,12 @@ def test_illegal_names(self):
         for ndx, illegal_name in enumerate(['test_illegal_name]','test_illegal_name[',
             'test_illegal_name`','test_illegal_name"', 'test_illegal_name\'', '']):
             df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
-            self.assertRaises(ValueError, df.to_sql, illegal_name, self.conn, 
+            self.assertRaises(ValueError, df.to_sql, illegal_name, self.conn,
                 flavor=self.flavor, index=False)
 
             df2 = DataFrame([[1, 2], [3, 4]], columns=['a', illegal_name])
             c_tbl = 'test_illegal_col_name%d'%ndx
-            self.assertRaises(ValueError, df2.to_sql, 'test_illegal_col_name', 
+            self.assertRaises(ValueError, df2.to_sql, 'test_illegal_col_name',
                 self.conn, flavor=self.flavor, index=False)
 
 
@@ -2021,7 +2021,7 @@ def test_tquery(self):
         frame = tm.makeTimeDataFrame()
         sql.write_frame(frame, name='test_table', con=self.db)
         result = sql.tquery("select A from test_table", self.db)
-        expected = frame.A
+        expected = Series(frame.A, frame.index) # not to have name
         result = Series(result, frame.index)
         tm.assert_series_equal(result, expected)
 
@@ -2359,7 +2359,7 @@ def test_tquery(self):
         cur.execute(drop_sql)
         sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
         result = sql.tquery("select A from test_table", self.db)
-        expected = frame.A
+        expected = Series(frame.A, frame.index) # not to have name
         result = Series(result, frame.index)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
index 0aaf018b21584..97bbfb0edf92c 100644
--- a/pandas/io/tests/test_stata.py
+++ b/pandas/io/tests/test_stata.py
@@ -875,8 +875,8 @@ def test_categorical_sorting(self):
         parsed_117.index = np.arange(parsed_117.shape[0])
         codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
         categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
-        expected = pd.Series(pd.Categorical.from_codes(codes=codes,
-                                                       categories=categories))
+        cat = pd.Categorical.from_codes(codes=codes, categories=categories)
+        expected = pd.Series(cat, name='srh')
         tm.assert_series_equal(expected, parsed_115["srh"])
         tm.assert_series_equal(expected, parsed_117["srh"])
 
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index 454cbcd5320e9..dd1d10f3d15ed 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -36,7 +36,7 @@
 import pandas.tests.test_panel as test_panel
 import pandas.tests.test_series as test_series
 
-from .test_array import assert_sp_array_equal
+from pandas.sparse.tests.test_array import assert_sp_array_equal
 
 import warnings
 warnings.filterwarnings(action='ignore', category=FutureWarning)
@@ -281,7 +281,7 @@ def test_constructor_nonnan(self):
         arr = [0, 0, 0, nan, nan]
         sp_series = SparseSeries(arr, fill_value=0)
         assert_equal(sp_series.values.values, arr)
-        
+
     # GH 9272
     def test_constructor_empty(self):
         sp = SparseSeries()
@@ -997,7 +997,7 @@ def test_constructor_ndarray(self):
             ValueError, "^Column length", SparseDataFrame, self.frame.values,
             columns=self.frame.columns[:-1])
 
-    # GH 9272 
+    # GH 9272
     def test_constructor_empty(self):
         sp = SparseDataFrame()
         self.assertEqual(len(sp.index), 0)
@@ -1283,7 +1283,9 @@ def _check_frame(frame):
             frame['E'] = to_insert
             expected = to_insert.to_dense().reindex(
                 frame.index).fillna(to_insert.fill_value)
-            assert_series_equal(frame['E'].to_dense(), expected)
+            result = frame['E'].to_dense()
+            assert_series_equal(result, expected, check_names=False)
+            self.assertEqual(result.name, 'E')
 
             # insert Series
             frame['F'] = frame['A'].to_dense()
@@ -1747,8 +1749,8 @@ def test_constructor(self):
         with tm.assertRaisesRegexp(TypeError,
                                    "input must be a dict, a 'list' was passed"):
             SparsePanel(['a', 'b', 'c'])
-        
-    # GH 9272    
+
+    # GH 9272
     def test_constructor_empty(self):
         sp = SparsePanel()
         self.assertEqual(len(sp.items), 0)
diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
index a30286479c847..fe64937d292c9 100644
--- a/pandas/stats/tests/test_moments.py
+++ b/pandas/stats/tests/test_moments.py
@@ -1254,7 +1254,8 @@ def _check_pairwise_moment(self, func, *args, **kwargs):
 
         actual = panel.ix[:, 1, 5]
         expected = func(self.frame[1], self.frame[5], *args, **kwargs)
-        tm.assert_series_equal(actual, expected)
+        tm.assert_series_equal(actual, expected, check_names=False)
+        self.assertEqual(actual.name, 5)
 
     def test_flex_binary_moment(self):
         # GH3155
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 5912ccb1494fe..8fbceb95ae563 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -2395,22 +2395,26 @@ def test_construction_with_categorical_index(self):
                         'B' : ci.values })
         idf = df.set_index('B')
         str(idf)
-        tm.assert_index_equal(idf.index,ci)
+        tm.assert_index_equal(idf.index, ci, check_names=False)
+        self.assertEqual(idf.index.name, 'B')
 
         # from a CategoricalIndex
         df = DataFrame({'A' : np.random.randn(10),
                         'B' : ci })
         idf = df.set_index('B')
         str(idf)
-        tm.assert_index_equal(idf.index,ci)
+        tm.assert_index_equal(idf.index, ci, check_names=False)
+        self.assertEqual(idf.index.name, 'B')
 
         idf = df.set_index('B').reset_index().set_index('B')
         str(idf)
-        tm.assert_index_equal(idf.index,ci)
+        tm.assert_index_equal(idf.index, ci, check_names=False)
+        self.assertEqual(idf.index.name, 'B')
 
         new_df = idf.reset_index()
         new_df.index = df.B
-        tm.assert_index_equal(new_df.index,ci)
+        tm.assert_index_equal(new_df.index, ci, check_names=False)
+        self.assertEqual(idf.index.name, 'B')
 
     def test_set_index_cast_datetimeindex(self):
         df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
@@ -7488,19 +7492,19 @@ def test_drop_names(self):
 
         # errors = 'ignore'
         dropped = df.drop(['g'], errors='ignore')
-        expected = Index(['a', 'b', 'c'])
+        expected = Index(['a', 'b', 'c'], name='first')
         self.assert_index_equal(dropped.index, expected)
 
         dropped = df.drop(['b', 'g'], errors='ignore')
-        expected = Index(['a', 'c'])
+        expected = Index(['a', 'c'], name='first')
         self.assert_index_equal(dropped.index, expected)
 
         dropped = df.drop(['g'], axis=1, errors='ignore')
-        expected = Index(['d', 'e', 'f'])
+        expected = Index(['d', 'e', 'f'], name='second')
         self.assert_index_equal(dropped.columns, expected)
 
         dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
-        expected = Index(['e', 'f'])
+        expected = Index(['e', 'f'], name='second')
         self.assert_index_equal(dropped.columns, expected)
 
     def test_dropEmptyRows(self):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 7af53c88f0f72..b96836b0323f6 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -297,9 +297,9 @@ def test_nth(self):
         # as it keeps the order in the series (and not the group order)
         # related GH 7287
         expected = s.groupby(g,sort=False).first()
-        expected.index = range(1,10)
-        result = s.groupby(g).nth(0,dropna='all')
-        assert_series_equal(result,expected)
+        expected.index = pd.Index(range(1,10), name=0)
+        result = s.groupby(g).nth(0, dropna='all')
+        assert_series_equal(result, expected)
 
         # doc example
         df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
@@ -807,9 +807,10 @@ def test_apply_issues(self):
         # GH 5789
         # don't auto coerce dates
         df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'])
-        expected = Series(['00:00','02:00','02:00'],index=['2011.05.16','2011.05.17','2011.05.18'])
+        exp_idx = pd.Index(['2011.05.16','2011.05.17','2011.05.18'], dtype=object, name='date')
+        expected = Series(['00:00','02:00','02:00'], index=exp_idx)
         result = df.groupby('date').apply(lambda x: x['time'][x['value'].idxmax()])
-        assert_series_equal(result,expected)
+        assert_series_equal(result, expected)
 
     def test_len(self):
         df = tm.makeTimeDataFrame()
@@ -1700,7 +1701,8 @@ def test_groupby_as_index_apply(self):
         # apply doesn't maintain the original ordering
         # changed in GH5610 as the as_index=False returns a MI here
         exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)])
-        exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)])
+        tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
+        exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
 
         assert_index_equal(res_as_apply, exp_as_apply)
         assert_index_equal(res_not_as_apply, exp_not_as_apply)
@@ -1922,6 +1924,8 @@ def _testit(op):
             for (cat1, cat2), group in grouped:
                 expd.setdefault(cat1, {})[cat2] = op(group['C'])
             exp = DataFrame(expd).T.stack(dropna=False)
+            exp.index.names = ['A', 'B']
+
             result = op(grouped)['C']
             assert_series_equal(result, exp)
 
@@ -1974,7 +1978,7 @@ def test_cython_agg_nothing_to_agg_with_dates(self):
     def test_groupby_timedelta_cython_count(self):
         df = DataFrame({'g': list('ab' * 2),
                         'delt': np.arange(4).astype('timedelta64[ns]')})
-        expected = Series([2, 2], index=['a', 'b'], name='delt')
+        expected = Series([2, 2], index=pd.Index(['a', 'b'], name='g'), name='delt')
         result = df.groupby('g').delt.count()
         tm.assert_series_equal(expected, result)
 
@@ -2385,13 +2389,13 @@ def test_count_object(self):
         df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3,
                            'c': [2] * 3 + [3] * 3})
         result = df.groupby('c').a.count()
-        expected = pd.Series([3, 3], index=[2, 3], name='a')
+        expected = pd.Series([3, 3], index=pd.Index([2, 3], name='c'), name='a')
         tm.assert_series_equal(result, expected)
 
         df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3,
                            'c': [2] * 3 + [3] * 3})
         result = df.groupby('c').a.count()
-        expected = pd.Series([1, 3], index=[2, 3], name='a')
+        expected = pd.Series([1, 3], index=pd.Index([2, 3], name='c'), name='a')
         tm.assert_series_equal(result, expected)
 
     def test_count_cross_type(self):  # GH8169
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 3c9dbd2e48cb6..4156199f34851 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -2055,10 +2055,10 @@ def test_view(self):
         self.assertEqual(i_view.name, 'Foo')
 
         i_view = i.view('i8')
-        tm.assert_index_equal(i, Int64Index(i_view))
+        tm.assert_index_equal(i, Int64Index(i_view, name='Foo'))
 
         i_view = i.view(Int64Index)
-        tm.assert_index_equal(i, Int64Index(i_view))
+        tm.assert_index_equal(i, Int64Index(i_view, name='Foo'))
 
     def test_coerce_list(self):
         # coerce things
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index e6a0f5d7ef45d..b2efc20aa0694 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -426,8 +426,15 @@ def test_frame_setitem_multi_column(self):
         # it broadcasts
         df['B', '1'] = [1, 2, 3]
         df['A'] = df['B', '1']
-        assert_series_equal(df['A', '1'], df['B', '1'])
-        assert_series_equal(df['A', '2'], df['B', '1'])
+
+        sliced_a1 = df['A', '1']
+        sliced_a2 = df['A', '2']
+        sliced_b1 = df['B', '1']
+        assert_series_equal(sliced_a1, sliced_b1, check_names=False)
+        assert_series_equal(sliced_a2, sliced_b1, check_names=False)
+        self.assertEqual(sliced_a1.name, ('A', '1'))
+        self.assertEqual(sliced_a2.name, ('A', '2'))
+        self.assertEqual(sliced_b1.name, ('B', '1'))
 
     def test_getitem_tuple_plus_slice(self):
         # GH #671
@@ -461,7 +468,9 @@ def test_getitem_multilevel_index_tuple_unsorted(self):
         df = df.set_index(index_columns)
         query_index = df.index[:1]
         rs = df.ix[query_index, "data"]
-        xp = Series(['x'], index=MultiIndex.from_tuples([(0, 1, 0)]))
+
+        xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
+        xp = Series(['x'], index=xp_idx, name='data')
         assert_series_equal(rs, xp)
 
     def test_xs(self):
@@ -865,7 +874,7 @@ def test_count_level_series(self):
     def test_count_level_corner(self):
         s = self.frame['A'][:0]
         result = s.count(level=0)
-        expected = Series(0, index=s.index.levels[0])
+        expected = Series(0, index=s.index.levels[0], name='A')
         assert_series_equal(result, expected)
 
         df = self.frame[:0]
@@ -982,7 +991,9 @@ def test_stack_mixed_dtype(self):
         df = df.sortlevel(1, axis=1)
 
         stacked = df.stack()
-        assert_series_equal(stacked['foo'], df['foo'].stack())
+        result = df['foo'].stack()
+        assert_series_equal(stacked['foo'], result, check_names=False)
+        self.assertIs(result.name, None)
         self.assertEqual(stacked['bar'].dtype, np.float_)
 
     def test_unstack_bug(self):
@@ -1430,11 +1441,13 @@ def test_count(self):
 
         result = series.count(level='b')
         expect = self.series.count(level=1)
-        assert_series_equal(result, expect)
+        assert_series_equal(result, expect, check_names=False)
+        self.assertEqual(result.index.name, 'b')
 
         result = series.count(level='a')
         expect = self.series.count(level=0)
-        assert_series_equal(result, expect)
+        assert_series_equal(result, expect, check_names=False)
+        self.assertEqual(result.index.name, 'a')
 
         self.assertRaises(KeyError, series.count, 'x')
         self.assertRaises(KeyError, frame.count, level='x')
@@ -1738,12 +1751,12 @@ def test_mixed_depth_get(self):
 
         result = df['a']
         expected = df['a', '', '']
-        assert_series_equal(result, expected)
+        assert_series_equal(result, expected, check_names=False)
         self.assertEqual(result.name, 'a')
 
         result = df['routine1', 'result1']
         expected = df['routine1', 'result1', '']
-        assert_series_equal(result, expected)
+        assert_series_equal(result, expected, check_names=False)
         self.assertEqual(result.name, ('routine1', 'result1'))
 
     def test_mixed_depth_insert(self):
@@ -1825,7 +1838,7 @@ def test_mixed_depth_pop(self):
         df2 = df.copy()
         result = df1.pop('a')
         expected = df2.pop(('a', '', ''))
-        assert_series_equal(expected, result)
+        assert_series_equal(expected, result, check_names=False)
         assert_frame_equal(df1, df2)
         self.assertEqual(result.name, 'a')
 
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index a405fa78b2518..236bdc8a98ff4 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -509,7 +509,9 @@ def test_major_xs(self):
         idx = self.panel.major_axis[5]
         xs = self.panel.major_xs(idx)
 
-        assert_series_equal(xs['ItemA'], ref.xs(idx))
+        result = xs['ItemA']
+        assert_series_equal(result, ref.xs(idx), check_names=False)
+        self.assertEqual(result.name, 'ItemA')
 
         # not contained
         idx = self.panel.major_axis[0] - bday
@@ -527,7 +529,7 @@ def test_minor_xs(self):
         idx = self.panel.minor_axis[1]
         xs = self.panel.minor_xs(idx)
 
-        assert_series_equal(xs['ItemA'], ref[idx])
+        assert_series_equal(xs['ItemA'], ref[idx], check_names=False)
 
         # not contained
         self.assertRaises(Exception, self.panel.minor_xs, 'E')
@@ -658,7 +660,7 @@ def test_ix_setitem_slice_dataframe(self):
 
     def test_ix_align(self):
         from pandas import Series
-        b = Series(np.random.randn(10))
+        b = Series(np.random.randn(10), name=0)
         b.sort()
         df_orig = Panel(np.random.randn(3, 10, 2))
         df = df_orig.copy()
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index 6bdff5aab3cfd..677173e9a75e9 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -483,8 +483,8 @@ def test_ops_compat(self):
             tm.assert_index_equal(result,expected)
 
         # divide with nats
-        rng = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo')
-        expected = Float64Index([12,np.nan,24])
+        rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
+        expected = Float64Index([12, np.nan, 24], name='foo')
         for offset in offsets:
             result = rng / offset
             tm.assert_index_equal(result,expected)
@@ -495,8 +495,8 @@ def test_ops_compat(self):
     def test_subtraction_ops(self):
 
         # with datetimes/timedelta and tdi/dti
-        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo')
-        dti = date_range('20130101',periods=3)
+        tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
+        dti = date_range('20130101', periods=3, name='bar')
         td = Timedelta('1 days')
         dt = Timestamp('20130101')
 
@@ -505,29 +505,29 @@ def test_subtraction_ops(self):
         self.assertRaises(TypeError, lambda : td - dt)
         self.assertRaises(TypeError, lambda : td - dti)
 
-        result = dt-dti
-        expected = TimedeltaIndex(['0 days','-1 days','-2 days'])
-        tm.assert_index_equal(result,expected)
+        result = dt - dti
+        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar')
+        tm.assert_index_equal(result, expected)
 
-        result = dti-dt
-        expected = TimedeltaIndex(['0 days','1 days','2 days'])
-        tm.assert_index_equal(result,expected)
+        result = dti - dt
+        expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar')
+        tm.assert_index_equal(result, expected)
 
-        result = tdi-td
-        expected = TimedeltaIndex(['0 days',pd.NaT,'1 days'])
-        tm.assert_index_equal(result,expected)
+        result = tdi - td
+        expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo')
+        tm.assert_index_equal(result, expected, check_names=False)
 
-        result = td-tdi
-        expected = TimedeltaIndex(['0 days',pd.NaT,'-1 days'])
-        tm.assert_index_equal(result,expected)
+        result = td - tdi
+        expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo')
+        tm.assert_index_equal(result, expected, check_names=False)
 
-        result = dti-td
-        expected = DatetimeIndex(['20121231','20130101','20130102'])
-        tm.assert_index_equal(result,expected)
+        result = dti - td
+        expected = DatetimeIndex(['20121231', '20130101', '20130102'], name='bar')
+        tm.assert_index_equal(result, expected, check_names=False)
 
-        result = dt-tdi
-        expected = DatetimeIndex(['20121231',pd.NaT,'20121230'])
-        tm.assert_index_equal(result,expected)
+        result = dt - tdi
+        expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo')
+        tm.assert_index_equal(result, expected)
 
     def test_subtraction_ops_with_tz(self):
 
@@ -644,46 +644,46 @@ def test_dti_dti_deprecated_ops(self):
     def test_dti_tdi_numeric_ops(self):
 
         # These are normally union/diff set-like ops
-        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo')
-        dti = date_range('20130101',periods=3)
+        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'], name='foo')
+        dti = date_range('20130101',periods=3, name='bar')
         td = Timedelta('1 days')
         dt = Timestamp('20130101')
 
-        result = tdi-tdi
-        expected = TimedeltaIndex(['0 days',pd.NaT,'0 days'])
-        tm.assert_index_equal(result,expected)
+        result = tdi - tdi
+        expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo')
+        tm.assert_index_equal(result, expected, check_names=False) # must be foo
 
-        result = tdi+tdi
-        expected = TimedeltaIndex(['2 days',pd.NaT,'4 days'])
-        tm.assert_index_equal(result,expected)
+        result = tdi + tdi
+        expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo')
+        tm.assert_index_equal(result, expected, check_names=False) # must be foo
 
-        result = dti-tdi
-        expected = DatetimeIndex(['20121231',pd.NaT,'20130101'])
-        tm.assert_index_equal(result,expected)
+        result = dti - tdi
+        expected = DatetimeIndex(['20121231', pd.NaT, '20130101'])
+        tm.assert_index_equal(result, expected)
 
     def test_addition_ops(self):
 
         # with datetimes/timedelta and tdi/dti
-        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo')
-        dti = date_range('20130101',periods=3)
+        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'], name='foo')
+        dti = date_range('20130101', periods=3, name='bar')
         td = Timedelta('1 days')
         dt = Timestamp('20130101')
 
         result = tdi + dt
-        expected = DatetimeIndex(['20130102',pd.NaT,'20130103'])
-        tm.assert_index_equal(result,expected)
+        expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo')
+        tm.assert_index_equal(result, expected)
 
         result = dt + tdi
-        expected = DatetimeIndex(['20130102',pd.NaT,'20130103'])
-        tm.assert_index_equal(result,expected)
+        expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo')
+        tm.assert_index_equal(result, expected)
 
         result = td + tdi
-        expected = TimedeltaIndex(['2 days',pd.NaT,'3 days'])
-        tm.assert_index_equal(result,expected)
+        expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo')
+        tm.assert_index_equal(result, expected, check_names=False) # must be foo
 
         result = tdi + td
-        expected = TimedeltaIndex(['2 days',pd.NaT,'3 days'])
-        tm.assert_index_equal(result,expected)
+        expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo')
+        tm.assert_index_equal(result,expected, check_names=False)  # must be foo
 
         # unequal length
         self.assertRaises(ValueError, lambda : tdi + dti[0:1])
@@ -696,7 +696,7 @@ def test_addition_ops(self):
         #self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi)
 
         result = tdi + dti
-        expected = DatetimeIndex(['20130102',pd.NaT,'20130105'])
+        expected = DatetimeIndex(['20130102', pd.NaT, '20130105'])
         tm.assert_index_equal(result,expected)
 
         result = dti + tdi
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index 2ae311e044a75..c1f5854b6b2b5 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -82,15 +82,17 @@ def test_resample_basic(self):
                          name='index')
         s = Series(np.random.randn(14), index=rng)
         result = s.resample('5min', how='mean', closed='right', label='right')
+
+        exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index')
         expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
-                          index=date_range('1/1/2000', periods=4, freq='5min'))
+                          index=exp_idx)
         assert_series_equal(result, expected)
         self.assertEqual(result.index.name, 'index')
 
         result = s.resample('5min', how='mean', closed='left', label='right')
-        expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()],
-                          index=date_range('1/1/2000 00:05', periods=3,
-                                           freq='5min'))
+
+        exp_idx = date_range('1/1/2000 00:05', periods=3, freq='5min', name='index')
+        expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()], index=exp_idx)
         assert_series_equal(result, expected)
 
         s = self.series
@@ -115,7 +117,7 @@ def _ohlc(group):
             if isnull(group).all():
                 return np.repeat(np.nan, 4)
             return [group[0], group.max(), group.min(), group[-1]]
-        inds = date_range('1/1/2000', periods=4, freq='5min')
+        inds = date_range('1/1/2000', periods=4, freq='5min', name='index')
 
         for arg in args:
             if arg == 'ohlc':
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index ea7354a9334ff..5c50b952f8109 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -533,7 +533,7 @@ def assert_equal(a, b, msg=""):
     assert a == b, "%s: %r != %r" % (msg.format(a,b), a, b)
 
 
-def assert_index_equal(left, right, exact=False):
+def assert_index_equal(left, right, exact=False, check_names=True):
     assert_isinstance(left, Index, '[index] ')
     assert_isinstance(right, Index, '[index] ')
     if not left.equals(right) or (exact and type(left) != type(right)):
@@ -541,6 +541,10 @@ def assert_index_equal(left, right, exact=False):
                                                                               left,
                                                                               right,
                                                                               right.dtype))
+    if check_names:
+        assert_attr_equal('names', left, right)
+
+
 def assert_attr_equal(attr, left, right):
     """checks attributes are equal. Both objects must have attribute."""
     left_attr = getattr(left, attr)
@@ -663,7 +667,8 @@ def assert_series_equal(left, right, check_dtype=True,
                         check_index_type=False,
                         check_series_type=False,
                         check_less_precise=False,
-                        check_exact=False):
+                        check_exact=False,
+                        check_names=True):
     if check_series_type:
         assert_isinstance(left, type(right))
     if check_dtype:
@@ -678,7 +683,7 @@ def assert_series_equal(left, right, check_dtype=True,
         assert_almost_equal(
             left.index.values, right.index.values, check_less_precise)
     else:
-        assert_index_equal(left.index, right.index)
+        assert_index_equal(left.index, right.index, check_names=check_names)
     if check_index_type:
         for level in range(left.index.nlevels):
             lindex = left.index.get_level_values(level)
@@ -687,6 +692,7 @@ def assert_series_equal(left, right, check_dtype=True,
             assert_attr_equal('dtype', lindex, rindex)
             assert_attr_equal('inferred_type', lindex, rindex)
 
+
 # This could be refactored to use the NDFrame.equals method
 def assert_frame_equal(left, right, check_dtype=True,
                        check_index_type=False,
@@ -707,8 +713,7 @@ def assert_frame_equal(left, right, check_dtype=True,
         assert_almost_equal(left.index, right.index)
     else:
         if not by_blocks:
-            assert_index_equal(left.columns, right.columns)
-        assert_index_equal(left.index, right.index)
+            assert_index_equal(left.columns, right.columns, check_names=check_names)
 
     # compare by blocks
     if by_blocks:
@@ -717,7 +722,7 @@ def assert_frame_equal(left, right, check_dtype=True,
         for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
             assert dtype in lblocks
             assert dtype in rblocks
-            assert_frame_equal(lblocks[dtype],rblocks[dtype],check_dtype=check_dtype)
+            assert_frame_equal(lblocks[dtype],rblocks[dtype], check_dtype=check_dtype)
 
     # compare by columns
     else:
@@ -729,7 +734,8 @@ def assert_frame_equal(left, right, check_dtype=True,
                                 check_dtype=check_dtype,
                                 check_index_type=check_index_type,
                                 check_less_precise=check_less_precise,
-                                check_exact=check_exact)
+                                check_exact=check_exact,
+                                check_names=check_names)
 
     if check_index_type:
         for level in range(left.index.nlevels):
@@ -750,14 +756,15 @@ def assert_frame_equal(left, right, check_dtype=True,
 def assert_panelnd_equal(left, right,
                          check_panel_type=False,
                          check_less_precise=False,
-                         assert_func=assert_frame_equal):
+                         assert_func=assert_frame_equal,
+                         check_names=False):
     if check_panel_type:
         assert_isinstance(left, type(right))
 
     for axis in ['items', 'major_axis', 'minor_axis']:
         left_ind = getattr(left, axis)
         right_ind = getattr(right, axis)
-        assert_index_equal(left_ind, right_ind)
+        assert_index_equal(left_ind, right_ind, check_names=check_names)
 
     for i, item in enumerate(left._get_axis(0)):
         assert item in right, "non-matching item (right) '%s'" % item

From ddc1512669ffb77a4252e99cf298b89b52ef702b Mon Sep 17 00:00:00 2001
From: Evan Wright <ewright@knight.com>
Date: Fri, 13 Mar 2015 12:51:57 -0400
Subject: [PATCH 109/239] ENH: Allow clip, clip_lower, and clip_upper to use
 array-like thresholds (GH 6966)

---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/generic.py          | 76 +++++++++++++++++++++++++++------
 pandas/core/ops.py              |  6 ++-
 pandas/tests/test_frame.py      | 33 ++++++++++++++
 pandas/tests/test_series.py     | 14 ++++++
 5 files changed, 115 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index de6a0748604e1..2aa06a6ec60e2 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -23,6 +23,7 @@ Enhancements
 
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
+- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
   The ``.str`` accessor is now available for both ``Series`` and ``Index``.
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 681cfc0f7a416..d7defd2b417da 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2821,37 +2821,77 @@ def notnull(self):
         """
         return notnull(self).__finalize__(self)
 
-    def clip(self, lower=None, upper=None, out=None):
+    def clip(self, lower=None, upper=None, out=None, axis=None):
         """
         Trim values at input threshold(s)
 
         Parameters
         ----------
-        lower : float, default None
-        upper : float, default None
+        lower : float or array_like, default None
+        upper : float or array_like, default None
+        axis : int or string axis name, optional
+            Align object with lower and upper along the given axis.
 
         Returns
         -------
         clipped : Series
+
+        Examples
+        --------
+        >>> df
+          0         1
+        0  0.335232 -1.256177
+        1 -1.367855  0.746646
+        2  0.027753 -1.176076
+        3  0.230930 -0.679613
+        4  1.261967  0.570967
+        >>> df.clip(-1.0, 0.5)
+                  0         1
+        0  0.335232 -1.000000
+        1 -1.000000  0.500000
+        2  0.027753 -1.000000
+        3  0.230930 -0.679613
+        4  0.500000  0.500000
+        >>> t
+        0   -0.3
+        1   -0.2
+        2   -0.1
+        3    0.0
+        4    0.1
+        dtype: float64
+        >>> df.clip(t, t + 1, axis=0)
+                  0         1
+        0  0.335232 -0.300000
+        1 -0.200000  0.746646
+        2  0.027753 -0.100000
+        3  0.230930  0.000000
+        4  1.100000  0.570967
         """
         if out is not None:  # pragma: no cover
             raise Exception('out argument is not supported yet')
 
         # GH 2747 (arguments were reversed)
         if lower is not None and upper is not None:
-            lower, upper = min(lower, upper), max(lower, upper)
+            if lib.isscalar(lower) and lib.isscalar(upper):
+                lower, upper = min(lower, upper), max(lower, upper)
 
         result = self
         if lower is not None:
-            result = result.clip_lower(lower)
+            result = result.clip_lower(lower, axis)
         if upper is not None:
-            result = result.clip_upper(upper)
+            result = result.clip_upper(upper, axis)
 
         return result
 
-    def clip_upper(self, threshold):
+    def clip_upper(self, threshold, axis=None):
         """
-        Return copy of input with values above given value truncated
+        Return copy of input with values above given value(s) truncated
+
+        Parameters
+        ----------
+        threshold : float or array_like
+        axis : int or string axis name, optional
+            Align object with threshold along the given axis.
 
         See also
         --------
@@ -2861,14 +2901,21 @@ def clip_upper(self, threshold):
         -------
         clipped : same type as input
         """
-        if isnull(threshold):
+        if np.any(isnull(threshold)):
             raise ValueError("Cannot use an NA value as a clip threshold")
 
-        return self.where((self <= threshold) | isnull(self), threshold)
+        subset = self.le(threshold, axis=axis) | isnull(self)
+        return self.where(subset, threshold, axis=axis)
 
-    def clip_lower(self, threshold):
+    def clip_lower(self, threshold, axis=None):
         """
-        Return copy of the input with values below given value truncated
+        Return copy of the input with values below given value(s) truncated
+
+        Parameters
+        ----------
+        threshold : float or array_like
+        axis : int or string axis name, optional
+            Align object with threshold along the given axis.
 
         See also
         --------
@@ -2878,10 +2925,11 @@ def clip_lower(self, threshold):
         -------
         clipped : same type as input
         """
-        if isnull(threshold):
+        if np.any(isnull(threshold)):
             raise ValueError("Cannot use an NA value as a clip threshold")
 
-        return self.where((self >= threshold) | isnull(self), threshold)
+        subset = self.ge(threshold, axis=axis) | isnull(self)
+        return self.where(subset, threshold, axis=axis)
 
     def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
                 group_keys=True, squeeze=False):
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 2af9cd43faaef..a4c9bff3dd97f 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -571,7 +571,11 @@ def na_op(x, y):
 
         return result
 
-    def wrapper(self, other):
+    def wrapper(self, other, axis=None):
+        # Validate the axis parameter
+        if axis is not None:
+            self._get_axis_number(axis)
+
         if isinstance(other, pd.Series):
             name = _maybe_match_name(self, other)
             if len(self) != len(other):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 8fbceb95ae563..f9ec4ec1a844c 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -11398,6 +11398,39 @@ def test_dataframe_clip(self):
             self.assertTrue((clipped_df.values[ub_mask] == ub).all() == True)
             self.assertTrue((clipped_df.values[mask] == df.values[mask]).all() == True)
 
+    def test_clip_against_series(self):
+        # GH #6966
+
+        df = DataFrame(np.random.randn(1000, 2))
+        lb = Series(np.random.randn(1000))
+        ub = lb + 1
+
+        clipped_df = df.clip(lb, ub, axis=0)
+
+        for i in range(2):
+            lb_mask = df.iloc[:, i] <= lb
+            ub_mask = df.iloc[:, i] >= ub
+            mask = ~lb_mask & ~ub_mask
+
+            assert_series_equal(clipped_df.loc[lb_mask, i], lb[lb_mask])
+            assert_series_equal(clipped_df.loc[ub_mask, i], ub[ub_mask])
+            assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
+
+    def test_clip_against_frame(self):
+        df = DataFrame(np.random.randn(1000, 2))
+        lb = DataFrame(np.random.randn(1000, 2))
+        ub = lb + 1
+
+        clipped_df = df.clip(lb, ub)
+
+        lb_mask = df <= lb
+        ub_mask = df >= ub
+        mask = ~lb_mask & ~ub_mask
+
+        assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
+        assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
+        assert_frame_equal(clipped_df[mask], df[mask])
+
     def test_get_X_columns(self):
         # numeric and object columns
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index f1a9e23796804..aa95986be0722 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5037,6 +5037,20 @@ def test_clip_types_and_nulls(self):
             self.assertEqual(list(isnull(s)), list(isnull(l)))
             self.assertEqual(list(isnull(s)), list(isnull(u)))
 
+    def test_clip_against_series(self):
+        # GH #6966
+
+        s = Series([1.0, 1.0, 4.0])
+        threshold = Series([1.0, 2.0, 3.0])
+
+        assert_series_equal(s.clip_lower(threshold), Series([1.0, 2.0, 4.0]))
+        assert_series_equal(s.clip_upper(threshold), Series([1.0, 1.0, 3.0]))
+
+        lower = Series([1.0, 2.0, 3.0])
+        upper = Series([1.5, 2.5, 3.5])
+        assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
+        assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
+
     def test_valid(self):
         ts = self.ts.copy()
         ts[::2] = np.NaN

From 48ed40ed22f34d8bcb3ccf80c7b98935b2c5ed7a Mon Sep 17 00:00:00 2001
From: Tiago Antao <tiagoantao@gmail.com>
Date: Sun, 30 Nov 2014 13:38:37 +0000
Subject: [PATCH 110/239] Applying max_colwidth to the DataFrame index (#7856)

justification to comply with tests
---
 doc/source/whatsnew/v0.16.1.txt |  1 +
 pandas/core/format.py           |  3 +++
 pandas/tests/test_format.py     | 15 +++++++++++++++
 3 files changed, 19 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 2aa06a6ec60e2..075b4e6fd288b 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -190,6 +190,7 @@ Bug Fixes
 - Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
+- Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`)
 
 - Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`)
 
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 6e632e6ea741b..3ab41ded1deea 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -776,6 +776,9 @@ def _get_formatted_index(self, frame):
                                      formatter=fmt)
         else:
             fmt_index = [index.format(name=show_index_names, formatter=fmt)]
+        fmt_index = [tuple(_make_fixed_width(
+            list(x), justify='left', minimum=(self.col_space or 0)))
+            for x in fmt_index]
 
         adjoined = adjoin(1, *fmt_index).split('\n')
 
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index b557594e8e7ef..f0afef71b3381 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -298,6 +298,21 @@ def mkframe(n):
                 com.pprint_thing(df._repr_fits_horizontal_())
                 self.assertTrue(has_expanded_repr(df))
 
+    def test_str_max_colwidth(self):
+        # GH 7856
+        df = pd.DataFrame([{'a': 'foo', 'b': 'bar',
+                            'c': 'uncomfortably long line with lots of stuff',
+                            'd': 1},
+                           {'a': 'foo', 'b': 'bar', 'c': 'stuff', 'd': 1}])
+        df.set_index(['a', 'b', 'c'])
+        self.assertTrue(str(df) == '     a    b                                           c  d\n'
+                                   '0  foo  bar  uncomfortably long line with lots of stuff  1\n'
+                                   '1  foo  bar                                       stuff  1')
+        with option_context('max_colwidth', 20):
+            self.assertTrue(str(df) == '     a    b                    c  d\n'
+                                       '0  foo  bar  uncomfortably lo...  1\n'
+                                       '1  foo  bar                stuff  1')
+
     def test_auto_detect(self):
         term_width, term_height = get_terminal_size()
         fac = 1.05  # Arbitrary large factor to exceed term widht

From 87cc46a0f1acee90aa6cf9cb2bd2c35d1ad3c5ba Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Fri, 3 Apr 2015 10:37:01 -0400
Subject: [PATCH 111/239] DOC: Fix broken formatting on docstring examples with
 first-line comments

---
 pandas/core/generic.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d7defd2b417da..bb5256f58795a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2735,7 +2735,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
         Examples
         --------
 
-        # Filling in NaNs:
+        Filling in NaNs
+
         >>> s = pd.Series([0, 1, np.nan, 3])
         >>> s.interpolate()
         0    0
@@ -2962,13 +2963,13 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
 
         Examples
         --------
-        # DataFrame result
-        >>> data.groupby(func, axis=0).mean()
+        DataFrame results
 
-        # DataFrame result
+        >>> data.groupby(func, axis=0).mean()
         >>> data.groupby(['col1', 'col2'])['col3'].mean()
 
-        # DataFrame with hierarchical index
+        DataFrame with hierarchical index
+
         >>> data.groupby(['col1', 'col2']).mean()
 
         Returns

From e7d0b146c9bb02a3912fb731924a6ad6867f93d6 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 28 Apr 2015 08:08:32 -0400
Subject: [PATCH 112/239] DOC: highlite to contributing.rst

---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 075b4e6fd288b..11cc39ae34584 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -10,6 +10,7 @@ We recommend that all users upgrade to this version.
 Highlights include:
 
 - Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
+- New section on how-to-contribute to *pandas*, see :ref`here <contributing>`
 
 .. contents:: What's new in v0.16.1
     :local:

From 0d25b3b8f7e02f9e79e439fe0d0a19c6a0d57013 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Sun, 19 Apr 2015 14:25:45 -0400
Subject: [PATCH 113/239] BUG: DataFrame constructor fails when columns is set
 and data=[] (GH9948/9939)

---
 doc/source/whatsnew/v0.16.1.txt |  6 ++++++
 pandas/core/frame.py            |  3 +--
 pandas/tests/test_frame.py      | 13 +++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 11cc39ae34584..f91ff00762375 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -307,9 +307,13 @@ Bug Fixes
 >>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 - Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 30580e7... Groupby transform preserves output dtype
 =======
 
+=======
+- Bug in ``DataFrame`` constructor when ``columns`` parameter is set, and ``data`` is an empty list (:issue:`9939`)
+>>>>>>> 514fe2d... BUG: DataFrame constructor fails when columns is set and data=[] (GH9948/9939)
 - Bug in bar plot with ``log=True`` raises ``TypeError`` if all values are less than 1 (:issue:`9905`)
 - Bug in horizontal bar plot ignores ``log=True`` (:issue:`9905`)
 
@@ -347,5 +351,7 @@ Bug Fixes
 =======
 
 
+
+
 - Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
 >>>>>>> d3ccb70... BUG: hidden ticklabels with sharex and secondary
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 272c401c18761..1476c118129b9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -265,8 +265,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                     mgr = self._init_ndarray(data, index, columns, dtype=dtype,
                                              copy=copy)
             else:
-                mgr = self._init_ndarray(data, index, columns, dtype=dtype,
-                                         copy=copy)
+                mgr = self._init_dict({}, index, columns, dtype=dtype)
         elif isinstance(data, collections.Iterator):
             raise TypeError("data argument can't be an iterator")
         else:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index f9ec4ec1a844c..ed5982984b344 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3165,6 +3165,19 @@ def test_constructor_empty_list(self):
         expected = DataFrame(index=[])
         assert_frame_equal(df, expected)
 
+        # GH 9939
+        df = DataFrame([], columns=['A', 'B'])
+        expected = DataFrame({}, columns=['A', 'B'])
+        assert_frame_equal(df, expected)
+
+        # Empty generator: list(empty_gen()) == []
+        def empty_gen():
+            return
+            yield
+
+        df = DataFrame(empty_gen(), columns=['A', 'B'])
+        assert_frame_equal(df, expected)
+
     def test_constructor_list_of_lists(self):
         # GH #484
         l = [[1, 'a'], [2, 'b']]

From 1589636d3894060fc084751020297d23affc71ff Mon Sep 17 00:00:00 2001
From: "Dr. Leo" <fhaxbox66@gmail.com>
Date: Tue, 28 Apr 2015 20:57:30 +0200
Subject: [PATCH 114/239] Docs: add reference to pandaSDMX to the ecosystem

---
 doc/source/ecosystem.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index 4a0743b8be3e4..a88c19c6edd55 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -137,6 +137,15 @@ PyDatastream is a Python interface to the
 SOAP API to return indexed Pandas DataFrames or Panels with financial data. 
 This package requires valid credentials for this API (non free).
 
+`pandaSDMX <http://pandasdmx.readthedocs.org>`_
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+pandaSDMX is an extensible library to retrieve and acquire statistical data 
+and metadata disseminated in 
+`SDMX <http://www.sdmx.org>`_ 2.1. This standard is currently supported by 
+the European statistics office (Eurostat)
+and the European Central Bank (ECB). Datasets may be returned as pandas Series 
+or multi-indexed DataFrames.   
+
 
 .. _ecosystem.domain:
 

From 208aba54e5df0682deba8ad4f5466b22e482bce2 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 28 Apr 2015 17:43:48 -0700
Subject: [PATCH 115/239] kwarg for Categorical.fillna should be value instead
 of fill_value

---
 pandas/core/categorical.py | 18 +++++++++---------
 pandas/core/internals.py   |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index caf706fcbcbbd..35db7481c8258 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -10,7 +10,7 @@
 from pandas.core.algorithms import factorize
 from pandas.core.base import PandasObject, PandasDelegate
 import pandas.core.common as com
-from pandas.util.decorators import cache_readonly
+from pandas.util.decorators import cache_readonly, deprecate_kwarg
 
 from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
                                 isnull, notnull, is_dtype_equal,
@@ -1168,7 +1168,8 @@ def to_dense(self):
         """
         return np.asarray(self)
 
-    def fillna(self, fill_value=None, method=None, limit=None):
+    @deprecate_kwarg(old_arg_name='fill_value', new_arg_name='value')
+    def fillna(self, value=None, method=None, limit=None):
         """ Fill NA/NaN values using the specified method.
 
         Parameters
@@ -1187,8 +1188,8 @@ def fillna(self, fill_value=None, method=None, limit=None):
         filled : Categorical with NA/NaN filled
         """
 
-        if fill_value is None:
-            fill_value = np.nan
+        if value is None:
+            value = np.nan
         if limit is not None:
             raise NotImplementedError("specifying a limit for fillna has not "
                                       "been implemented yet")
@@ -1203,24 +1204,23 @@ def fillna(self, fill_value=None, method=None, limit=None):
                 # we only have one NA in categories
                 values[values == nan_pos] = -1
 
-
         # pad / bfill
         if method is not None:
 
-            values = self.to_dense().reshape(-1,len(self))
+            values = self.to_dense().reshape(-1, len(self))
             values = com.interpolate_2d(
-                values, method, 0, None, fill_value).astype(self.categories.dtype)[0]
+                values, method, 0, None, value).astype(self.categories.dtype)[0]
             values = _get_codes_for_values(values, self.categories)
 
         else:
 
-            if not isnull(fill_value) and fill_value not in self.categories:
+            if not isnull(value) and value not in self.categories:
                 raise ValueError("fill value must be in categories")
 
             mask = values==-1
             if mask.any():
                 values = values.copy()
-                values[mask] = self.categories.get_loc(fill_value)
+                values[mask] = self.categories.get_loc(value)
 
         return Categorical(values, categories=self.categories, ordered=self.ordered,
                            name=self.name, fastpath=True)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 440892f8e8b59..9b83cf1544f2b 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1693,7 +1693,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
                                       "not been implemented yet")
 
         values = self.values if inplace else self.values.copy()
-        return [self.make_block_same_class(values=values.fillna(fill_value=value,
+        return [self.make_block_same_class(values=values.fillna(value=value,
                                                                 limit=limit),
                                            placement=self.mgr_locs)]
 

From a0953f90931abf6e6d5e824ce491ed9e0fe627f1 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 20 Apr 2015 10:49:25 -0700
Subject: [PATCH 116/239] CLN: remove an unnecessary warning from test_index.py

---
 pandas/core/index.py       | 2 +-
 pandas/tests/test_index.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index 8b650fea9b440..83f60b360c746 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1357,7 +1357,7 @@ def difference(self, other):
         theDiff = sorted(set(self) - set(other))
         return Index(theDiff, name=result_name)
 
-    diff = deprecate('diff',difference)
+    diff = deprecate('diff', difference)
 
     def sym_diff(self, other, result_name=None):
         """
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 4156199f34851..313a723f7b1ef 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -769,7 +769,7 @@ def test_difference(self):
         self.assertEqual(result.name, first.name)
 
         # non-iterable input
-        assertRaisesRegexp(TypeError, "iterable", first.diff, 0.5)
+        assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5)
 
     def test_symmetric_diff(self):
         # smoke

From 043af5805bd26ffb96e267578b79c22f83be2401 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Sun, 26 Apr 2015 14:52:01 -0400
Subject: [PATCH 117/239] BUG: transform and filter misbehave when grouping on
 categorical data (GH 9921)

---
 doc/source/whatsnew/v0.16.1.txt  |  5 +++
 pandas/core/groupby.py           | 53 ++++++++++++++++----------------
 pandas/tests/test_categorical.py | 21 +++++++++++++
 pandas/tests/test_groupby.py     |  6 ++++
 4 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index f91ff00762375..371146da8703d 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -354,4 +354,9 @@ Bug Fixes
 
 
 - Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
+<<<<<<< HEAD
 >>>>>>> d3ccb70... BUG: hidden ticklabels with sharex and secondary
+=======
+- Bug in ``transform`` and ``filter`` when grouping on a categorical variable (:issue:`9921`)
+- Bug in ``transform`` when groups are equal in number and dtype to the input index (:issue:`9700`)
+>>>>>>> 3d73550... BUG: transform and filter misbehave when grouping on categorical data (GH 9921)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 38619229f1086..f141790fbbd48 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -25,8 +25,8 @@
                                notnull, _DATELIKE_DTYPES, is_numeric_dtype,
                                is_timedelta64_dtype, is_datetime64_dtype,
                                is_categorical_dtype, _values_from_object,
-                               is_datetime_or_timedelta_dtype, is_bool_dtype,
-                               AbstractMethodError)
+                               is_datetime_or_timedelta_dtype, is_bool,
+                               is_bool_dtype, AbstractMethodError)
 from pandas.core.config import option_context
 import pandas.lib as lib
 from pandas.lib import Timestamp
@@ -491,7 +491,7 @@ def _set_result_index_ordered(self, result):
 
         # shortcut of we have an already ordered grouper
         if not self.grouper.is_monotonic:
-            index = Index(np.concatenate([ indices[v] for v in self.grouper.result_index ]))
+            index = Index(np.concatenate([ indices.get(v, []) for v in self.grouper.result_index]))
             result.index = index
             result = result.sort_index()
 
@@ -2436,6 +2436,8 @@ def transform(self, func, *args, **kwargs):
 
         wrapper = lambda x: func(x, *args, **kwargs)
         for i, (name, group) in enumerate(self):
+            if name not in self.indices:
+                continue
 
             object.__setattr__(group, 'name', name)
             res = wrapper(group)
@@ -2451,7 +2453,7 @@ def transform(self, func, *args, **kwargs):
             except:
                 pass
 
-            indexer = self._get_index(name)
+            indexer = self.indices[name]
             result[indexer] = res
 
         result = _possibly_downcast_to_dtype(result, dtype)
@@ -2465,9 +2467,12 @@ def _transform_fast(self, func):
         """
         if isinstance(func, compat.string_types):
             func = getattr(self,func)
+
         values = func().values
-        counts = self.size().values
+        counts = self.size().fillna(0).values
         values = np.repeat(values, com._ensure_platform_int(counts))
+        if any(counts == 0):
+            values = self._try_cast(values, self._selected_obj)
 
         return self._set_result_index_ordered(Series(values))
 
@@ -2502,8 +2507,11 @@ def true_and_notnull(x, *args, **kwargs):
             return b and notnull(b)
 
         try:
-            indices = [self._get_index(name) if true_and_notnull(group) else []
-                       for name, group in self]
+            indices = []
+            for name, group in self:
+                if true_and_notnull(group) and name in self.indices:
+                    indices.append(self.indices[name])
+
         except ValueError:
             raise TypeError("the filter must return a boolean result")
         except TypeError:
@@ -3020,24 +3028,18 @@ def transform(self, func, *args, **kwargs):
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 
-        # a grouped that doesn't preserve the index, remap index based on the grouper
-        # and broadcast it
-        if ((not isinstance(obj.index,MultiIndex) and
-             type(result.index) != type(obj.index)) or
-            len(result.index) != len(obj.index)):
-            results = np.empty_like(obj.values, result.values.dtype)
-            indices = self.indices
-            for (name, group), (i, row) in zip(self, result.iterrows()):
+        results = np.empty_like(obj.values, result.values.dtype)
+        indices = self.indices
+        for (name, group), (i, row) in zip(self, result.iterrows()):
+            if name in indices:
                 indexer = indices[name]
                 results[indexer] = np.tile(row.values,len(indexer)).reshape(len(indexer),-1)
-            return DataFrame(results,columns=result.columns,index=obj.index).convert_objects()
 
-        # we can merge the result in
-        # GH 7383
-        names = result.columns
-        result = obj.merge(result, how='outer', left_index=True, right_index=True).iloc[:,-result.shape[1]:]
-        result.columns = names
-        return result
+        counts = self.size().fillna(0).values
+        if any(counts == 0):
+            results = self._try_cast(results, obj[result.columns])
+
+        return DataFrame(results,columns=result.columns,index=obj.index).convert_objects()
 
     def _define_paths(self, func, *args, **kwargs):
         if isinstance(func, compat.string_types):
@@ -3129,10 +3131,9 @@ def filter(self, func, dropna=True, *args, **kwargs):
                 pass
 
             # interpret the result of the filter
-            if (isinstance(res, (bool, np.bool_)) or
-                np.isscalar(res) and isnull(res)):
-                if res and notnull(res):
-                    indices.append(self._get_index(name))
+            if is_bool(res) or (lib.isscalar(res) and isnull(res)):
+                if res and notnull(res) and name in self.indices:
+                    indices.append(self.indices[name])
             else:
                 # non scalars aren't allowed
                 raise TypeError("filter function returned a %s, "
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 6a6564347d35f..66e411d1eaddb 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1820,6 +1820,27 @@ def f(x):
         expected['person_name'] = expected['person_name'].astype('object')
         tm.assert_frame_equal(result, expected)
 
+        # GH 9921
+        # Monotonic
+        df = DataFrame({"a": [5, 15, 25]})
+        c = pd.cut(df.a, bins=[0,10,20,30,40])
+        tm.assert_series_equal(df.a.groupby(c).transform(sum), df['a'])
+        tm.assert_series_equal(df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+        tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']])
+
+        # Filter
+        tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a'])
+        tm.assert_frame_equal(df.groupby(c).filter(np.all), df)
+
+        # Non-monotonic
+        df = DataFrame({"a": [5, 15, 25, -5]})
+        c = pd.cut(df.a, bins=[-10, 0,10,20,30,40])
+        tm.assert_series_equal(df.a.groupby(c).transform(sum), df['a'])
+        tm.assert_series_equal(df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+        tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
+
     def test_pivot_table(self):
 
         raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"], ordered=True)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index b96836b0323f6..e3331b7423f2c 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -960,6 +960,12 @@ def demean(arr):
         g = df.groupby(pd.TimeGrouper('M'))
         g.transform(lambda x: x-1)
 
+        # GH 9700
+        df = DataFrame({'a' : range(5, 10), 'b' : range(5)})
+        result = df.groupby('a').transform(max)
+        expected = DataFrame({'b' : range(5)})
+        tm.assert_frame_equal(result, expected)
+
     def test_transform_fast(self):
 
         df = DataFrame( { 'id' : np.arange( 100000 ) / 3,

From 61df628db91c8804279ab4e49fb0fa7166fcb4cc Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Wed, 25 Mar 2015 08:52:11 -0400
Subject: [PATCH 118/239] ENH: Add an axis parameter to DataFrame.diff

---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 pandas/core/frame.py            | 6 ++++--
 pandas/core/internals.py        | 4 ++--
 pandas/tests/test_frame.py      | 6 ++++++
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 371146da8703d..f8f80f96b3dc2 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -23,6 +23,7 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
+- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
 - Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1476c118129b9..341e129b22212 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3586,7 +3586,7 @@ def unstack(self, level=-1):
     #----------------------------------------------------------------------
     # Time series-related
 
-    def diff(self, periods=1):
+    def diff(self, periods=1, axis=0):
         """
         1st discrete difference of object
 
@@ -3594,12 +3594,14 @@ def diff(self, periods=1):
         ----------
         periods : int, default 1
             Periods to shift for forming difference
+        axis : {0 or 'index', 1 or 'columns'}, default 0
 
         Returns
         -------
         diffed : DataFrame
         """
-        new_data = self._data.diff(n=periods)
+        bm_axis = self._get_block_manager_axis(axis)
+        new_data = self._data.diff(n=periods, axis=bm_axis)
         return self._constructor(new_data)
 
     #----------------------------------------------------------------------
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 9b83cf1544f2b..121c4a89840e4 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -874,9 +874,9 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
     def get_values(self, dtype=None):
         return self.values
 
-    def diff(self, n):
+    def diff(self, n, axis=1):
         """ return block for the diff of the values """
-        new_values = com.diff(self.values, n, axis=1)
+        new_values = com.diff(self.values, n, axis=axis)
         return [make_block(values=new_values,
                            ndim=self.ndim, fastpath=True,
                            placement=self.mgr_locs)]
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index ed5982984b344..004c49005ca0e 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -10100,6 +10100,12 @@ def test_diff_float_n(self):
         xp = self.tsframe.diff(1)
         assert_frame_equal(rs, xp)
 
+    def test_diff_axis(self):
+        # GH 9727
+        df = DataFrame([[1., 2.], [3., 4.]])
+        assert_frame_equal(df.diff(axis=1), DataFrame([[np.nan, 1.], [np.nan, 1.]]))
+        assert_frame_equal(df.diff(axis=0), DataFrame([[np.nan, np.nan], [2., 2.]]))
+
     def test_pct_change(self):
         rs = self.tsframe.pct_change(fill_method=None)
         assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)

From efaa4dc2d0288dac9b85c351e40760a1abd2ccac Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 28 Apr 2015 14:20:25 -0700
Subject: [PATCH 119/239] DOC: improve fillna() doc for limit keyword (fixes
 #10002)

---
 pandas/core/categorical.py | 8 +++++++-
 pandas/core/generic.py     | 7 ++++++-
 pandas/core/internals.py   | 2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 35db7481c8258..97368baffd40b 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -1181,7 +1181,13 @@ def fillna(self, value=None, method=None, limit=None):
         value : scalar
             Value to use to fill holes (e.g. 0)
         limit : int, default None
-            Maximum size gap to forward or backward fill (not implemented yet!)
+            (Not implemented yet for Categorical!)
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled.
 
         Returns
         -------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index bb5256f58795a..a4c78e2fc9bf1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2319,7 +2319,12 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
             other views on this object, (e.g. a no-copy slice for a column in a
             DataFrame).
         limit : int, default None
-            Maximum size gap to forward or backward fill
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled.
         downcast : dict, default is None
             a dict of item->dtype of what to downcast if possible,
             or the string 'infer' which will try to downcast to an appropriate
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 121c4a89840e4..564484a19e873 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -296,7 +296,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
             if self.ndim > 2:
                 raise NotImplementedError("number of dimensions for 'fillna' "
                                           "is currently limited to 2")
-            mask[mask.cumsum(self.ndim-1)>limit]=False
+            mask[mask.cumsum(self.ndim-1) > limit] = False
 
         value = self._try_fill(value)
         blocks = self.putmask(mask, value, inplace=inplace)

From 28b7cb3f2ee8cbb1b7ffac0ca2b1b1b472a3d78c Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Tue, 28 Apr 2015 09:01:17 -0400
Subject: [PATCH 120/239] BUG: null group spills into final group when grouping
 on a categorical

---
 doc/source/whatsnew/v0.16.1.txt  |  2 +-
 pandas/lib.pyx                   | 20 ++++++++++++--------
 pandas/tests/test_categorical.py |  7 +++++++
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index f8f80f96b3dc2..307638b9a2288 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -271,7 +271,7 @@ Bug Fixes
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
 
 
-
+- Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
 
 
 - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`)
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 0d53b19425c2f..de966d6e03ee2 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1306,9 +1306,10 @@ def duplicated(ndarray[object] values, take_last=False):
 
 def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     cdef:
-        Py_ssize_t i, group_size, n, lab, start
+        Py_ssize_t i, group_size, n, start
+        int64_t lab
         object slobj
-        ndarray[int64_t] starts
+        ndarray[int64_t] starts, ends
 
     n = len(labels)
 
@@ -1318,13 +1319,16 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     start = 0
     group_size = 0
     for i in range(n):
-        group_size += 1
         lab = labels[i]
-        if i == n - 1 or lab != labels[i + 1]:
-            starts[lab] = start
-            ends[lab] = start + group_size
-            start += group_size
-            group_size = 0
+        if lab < 0:
+            start += 1
+        else:
+            group_size += 1
+            if i == n - 1 or lab != labels[i + 1]:
+                starts[lab] = start
+                ends[lab] = start + group_size
+                start += group_size
+                group_size = 0
 
     return starts, ends
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 66e411d1eaddb..5a5401c8da3ca 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1841,6 +1841,13 @@ def f(x):
         tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
         tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
 
+        # GH 9603
+        df = pd.DataFrame({'a': [1, 0, 0, 0]})
+        c = pd.cut(df.a, [0, 1, 2, 3, 4])
+        result = df.groupby(c).apply(len)
+        expected = pd.Series([1, 0, 0, 0], index=c.values.categories)
+        tm.assert_series_equal(result, expected)
+
     def test_pivot_table(self):
 
         raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"], ordered=True)

From de30f6225d2bcd82b972cc60076794224dd1f0d1 Mon Sep 17 00:00:00 2001
From: Evan Wright <ewright@knight.com>
Date: Wed, 29 Apr 2015 15:26:30 -0400
Subject: [PATCH 121/239] Fix missing index name in test

---
 pandas/tests/test_categorical.py | 1 +
 1 file changed, 1 insertion(+)
 mode change 100644 => 100755 pandas/tests/test_categorical.py

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
old mode 100644
new mode 100755
index 5a5401c8da3ca..c03fd93f6173f
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1846,6 +1846,7 @@ def f(x):
         c = pd.cut(df.a, [0, 1, 2, 3, 4])
         result = df.groupby(c).apply(len)
         expected = pd.Series([1, 0, 0, 0], index=c.values.categories)
+        expected.index.name = 'a'
         tm.assert_series_equal(result, expected)
 
     def test_pivot_table(self):

From 483d9949150dcf6fe5bfdbf5a7938d64a807a29e Mon Sep 17 00:00:00 2001
From: Artemy Kolchinsky <akolchin@indiana.edu>
Date: Fri, 24 Apr 2015 22:28:07 -0400
Subject: [PATCH 122/239] BUG: Make .iloc and .loc indexing consistent on empty
 dataframes

Tests

Fix

Test reorder

Doc update

Tests fix

Tests fix

SQL tests fix

Testing update

Fixes

Testing fix

Test fix
---
 doc/source/whatsnew/v0.16.1.txt          |  3 +--
 pandas/core/frame.py                     |  8 +++---
 pandas/io/tests/test_json/test_pandas.py |  6 +++--
 pandas/io/tests/test_sql.py              | 10 ++++---
 pandas/stats/tests/test_moments.py       |  3 ++-
 pandas/tests/test_frame.py               | 10 +++----
 pandas/tests/test_groupby.py             |  4 +++
 pandas/tests/test_indexing.py            | 33 ++++++++++++++++++++----
 pandas/tests/test_testing.py             |  8 ++++++
 pandas/tseries/tests/test_period.py      |  1 +
 10 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 307638b9a2288..4c666704fe039 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -270,9 +270,8 @@ Bug Fixes
 - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
 
-
 - Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
-
+- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)
 
 - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 341e129b22212..01b0d65e055df 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1740,17 +1740,19 @@ def _ixs(self, i, axis=0):
                 lab_slice = slice(label[0], label[-1])
                 return self.ix[:, lab_slice]
             else:
-                label = self.columns[i]
                 if isinstance(label, Index):
                     return self.take(i, axis=1, convert=True)
 
+                index_len = len(self.index)
+
                 # if the values returned are not the same length
                 # as the index (iow a not found value), iget returns
                 # a 0-len ndarray. This is effectively catching
                 # a numpy error (as numpy should really raise)
                 values = self._data.iget(i)
-                if not len(values):
-                    values = np.array([np.nan] * len(self.index), dtype=object)
+
+                if index_len and not len(values):
+                    values = np.array([np.nan] * index_len, dtype=object)
                 result = self._constructor_sliced.from_array(
                     values, index=self.index,
                     name=label, fastpath=True)
diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
index 1e8ce7afa9492..26fae0717f956 100644
--- a/pandas/io/tests/test_json/test_pandas.py
+++ b/pandas/io/tests/test_json/test_pandas.py
@@ -324,12 +324,14 @@ def test_frame_to_json_except(self):
     def test_frame_empty(self):
         df = DataFrame(columns=['jim', 'joe'])
         self.assertFalse(df._is_mixed_type)
-        assert_frame_equal(read_json(df.to_json()), df)
+        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df)
 
+    def test_frame_empty_mixedtype(self):
         # mixed type
+        df = DataFrame(columns=['jim', 'joe'])
         df['joe'] = df['joe'].astype('i8')
         self.assertTrue(df._is_mixed_type)
-        assert_frame_equal(read_json(df.to_json()), df)
+        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df)
 
     def test_v12_compat(self):
         df = DataFrame(
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index f3f00862054e4..fa7debeb228ce 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -1256,10 +1256,14 @@ def test_transactions(self):
         self._transaction_test()
 
     def test_get_schema_create_table(self):
-        self._load_test2_data()
+        # Use a dataframe without a bool column, since MySQL converts bool to 
+        # TINYINT (which read_sql_table returns as an int and causes a dtype
+        # mismatch)
+
+        self._load_test3_data()
         tbl = 'test_get_schema_create_table'
-        create_sql = sql.get_schema(self.test_frame2, tbl, con=self.conn)
-        blank_test_df = self.test_frame2.iloc[:0]
+        create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn)
+        blank_test_df = self.test_frame3.iloc[:0]
 
         self.drop_table(tbl)
         self.conn.execute(create_sql)
diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
index fe64937d292c9..445530bc5b00c 100644
--- a/pandas/stats/tests/test_moments.py
+++ b/pandas/stats/tests/test_moments.py
@@ -862,7 +862,7 @@ def _non_null_values(x):
             if mock_mean:
                 # check that mean equals mock_mean
                 expected = mock_mean(x)
-                assert_equal(mean_x, expected)
+                assert_equal(mean_x, expected.astype('float64'))
 
             # check that correlation of a series with itself is either 1 or NaN
             corr_x_x = corr(x, x)
@@ -1550,6 +1550,7 @@ def test_moment_functions_zero_length(self):
         df1_expected = df1
         df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns)
         df2 = DataFrame(columns=['a'])
+        df2['a'] = df2['a'].astype('float64')
         df2_expected = df2
         df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 004c49005ca0e..cf7523b34595a 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -11864,12 +11864,10 @@ def test_mode(self):
                            "E": [8, 8, 1, 1, 3, 3]})
         assert_frame_equal(df[["A"]].mode(),
                            pd.DataFrame({"A": [12]}))
-        assert_frame_equal(df[["D"]].mode(),
-                           pd.DataFrame(pd.Series([], dtype="int64"),
-                                        columns=["D"]))
-        assert_frame_equal(df[["E"]].mode(),
-                           pd.DataFrame(pd.Series([1, 3, 8], dtype="int64"),
-                                        columns=["E"]))
+        expected = pd.Series([], dtype='int64', name='D').to_frame()
+        assert_frame_equal(df[["D"]].mode(), expected)
+        expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame()
+        assert_frame_equal(df[["E"]].mode(), expected)
         assert_frame_equal(df[["A", "B"]].mode(),
                            pd.DataFrame({"A": [12], "B": [10.]}))
         assert_frame_equal(df.mode(),
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index e3331b7423f2c..0e64d27649d80 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1728,6 +1728,8 @@ def test_groupby_head_tail(self):
         assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
 
         empty_not_as = DataFrame(columns=df.columns)
+        empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+        empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
         assert_frame_equal(empty_not_as, g_not_as.head(0))
         assert_frame_equal(empty_not_as, g_not_as.tail(0))
         assert_frame_equal(empty_not_as, g_not_as.head(-1))
@@ -1743,6 +1745,8 @@ def test_groupby_head_tail(self):
         assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
 
         empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
+        empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+        empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
         assert_frame_equal(empty_as, g_as.head(0))
         assert_frame_equal(empty_as, g_as.tail(0))
         assert_frame_equal(empty_as, g_as.head(-1))
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 19ed799853ed4..ece690ad21bcc 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -1063,6 +1063,7 @@ def test_loc_setitem_consistency(self):
 
         # empty (essentially noops)
         expected = DataFrame(columns=['x', 'y'])
+        expected['x'] = expected['x'].astype(np.int64)
         df = DataFrame(columns=['x', 'y'])
         df.loc[:, 'x'] = 1
         assert_frame_equal(df,expected)
@@ -3376,7 +3377,7 @@ def f():
         expected = DataFrame(columns=['foo'])
         def f():
             df = DataFrame()
-            df['foo'] = Series([])
+            df['foo'] = Series([], dtype='object')
             return df
         assert_frame_equal(f(), expected)
         def f():
@@ -3386,9 +3387,12 @@ def f():
         assert_frame_equal(f(), expected)
         def f():
             df = DataFrame()
-            df['foo'] = Series(range(len(df)))
+            df['foo'] = df.index
             return df
         assert_frame_equal(f(), expected)
+
+        expected = DataFrame(columns=['foo'])
+        expected['foo'] = expected['foo'].astype('float64')
         def f():
             df = DataFrame()
             df['foo'] = []
@@ -3396,7 +3400,7 @@ def f():
         assert_frame_equal(f(), expected)
         def f():
             df = DataFrame()
-            df['foo'] = df.index
+            df['foo'] = Series(range(len(df)))
             return df
         assert_frame_equal(f(), expected)
         def f():
@@ -3429,14 +3433,21 @@ def f():
 
         # GH5720, GH5744
         # don't create rows when empty
+        expected = DataFrame(columns=['A','B','New'])
+        expected['A'] = expected['A'].astype('int64')
+        expected['B'] = expected['B'].astype('float64')
+        expected['New'] = expected['New'].astype('float64')
         df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
         y = df[df.A > 5]
         y['New'] = np.nan
-        assert_frame_equal(y,DataFrame(columns=['A','B','New']))
+        assert_frame_equal(y,expected)
+        #assert_frame_equal(y,expected)
 
+        expected = DataFrame(columns=['a','b','c c','d'])
+        expected['d'] = expected['d'].astype('int64')
         df = DataFrame(columns=['a', 'b', 'c c'])
         df['d'] = 3
-        assert_frame_equal(df,DataFrame(columns=['a','b','c c','d']))
+        assert_frame_equal(df,expected)
         assert_series_equal(df['c c'],Series(name='c c',dtype=object))
 
         # reindex columns is ok
@@ -3444,6 +3455,9 @@ def f():
         y = df[df.A > 5]
         result = y.reindex(columns=['A','B','C'])
         expected = DataFrame(columns=['A','B','C'])
+        expected['A'] = expected['A'].astype('int64')
+        expected['B'] = expected['B'].astype('float64')
+        expected['C'] = expected['C'].astype('float64')
         assert_frame_equal(result,expected)
 
         # GH 5756
@@ -4429,6 +4443,15 @@ def test_indexing_assignment_dict_already_exists(self):
         expected.loc[5] = [9, 99]
         tm.assert_frame_equal(df, expected)
 
+    def test_indexing_dtypes_on_empty(self):
+        # Check that .iloc and .ix return correct dtypes GH9983
+        df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']})
+        df2 = df.ix[[],:]
+
+        self.assertEqual(df2.loc[:,'a'].dtype, int)
+        assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0])
+        assert_series_equal(df2.loc[:,'a'], df2.ix[:,0])
+
 
 
 class TestCategoricalIndex(tm.TestCase):
diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py
index 642e50c37874d..cc0a0ea5662db 100644
--- a/pandas/tests/test_testing.py
+++ b/pandas/tests/test_testing.py
@@ -215,6 +215,14 @@ def test_multiindex_dtype(self):
                 {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b'])
         self._assert_not_equal(df1, df2, check_index_type=True)
 
+    def test_empty_dtypes(self):
+        df1=pd.DataFrame(columns=["col1","col2"])
+        df1["col1"] = df1["col1"].astype('int64')
+        df2=pd.DataFrame(columns=["col1","col2"])
+        self._assert_equal(df1, df2, check_dtype=False)
+        self._assert_not_equal(df1, df2, check_dtype=True)
+        
+
 class TestRNGContext(unittest.TestCase):
 
     def test_RNGContext(self):
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 23785598783ea..70c706fc66398 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -2118,6 +2118,7 @@ def test_range_slice_outofbounds(self):
         for idx in [didx, pidx]:
             df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
             empty = DataFrame(index=idx.__class__([], freq='D'), columns=['units'])
+            empty['units'] = empty['units'].astype('int64')
 
             tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty)
             tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2])

From 8bdc53151ce9568f38a0094c8cef62e8fa3d109c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 15:04:21 -0400
Subject: [PATCH 123/239] Network-ize a test

---
 pandas/io/tests/test_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py
index 9b27d612cdeee..63ed26ea7d931 100644
--- a/pandas/io/tests/test_data.py
+++ b/pandas/io/tests/test_data.py
@@ -105,6 +105,7 @@ def test_get_multi_all_invalid(self):
         sl = ['INVALID', 'INVALID2', 'INVALID3']
         self.assertRaises(RemoteDataError, web.get_data_google, sl, '2012')
 
+    @network
     def test_get_multi2(self):
         with warnings.catch_warnings(record=True) as w:
             for locale in self.locales:

From 22b00ca6d6792b61923ffba8c39a2705e0debe8d Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 15:04:44 -0400
Subject: [PATCH 124/239] Use explicit len dispatch to avoid overhead

---
 pandas/lib.pyx | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index de966d6e03ee2..88e9ee66e62a8 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -18,7 +18,10 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       PyBytes_Check,
                       PyTuple_SetItem,
                       PyTuple_New,
-                      PyObject_SetAttrString)
+                      PyObject_SetAttrString,
+                      PyString_GET_SIZE,
+                      PyBytes_GET_SIZE,
+                      PyUnicode_GET_SIZE)
 
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
@@ -901,18 +904,20 @@ def clean_index_list(list obj):
 def max_len_string_array(ndarray arr):
     """ return the maximum size of elements in a 1-dim string array """
     cdef:
-        int i, m, l
-        int length = arr.shape[0]
+        Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
         object v
 
-    m = 0
-    for i from 0 <= i < length:
+    for i in range(length):
         v = arr[i]
-        if PyString_Check(v) or PyBytes_Check(v) or PyUnicode_Check(v):
-            l = len(v)
-
-            if l > m:
-                m = l
+        if PyString_Check(v):
+            l = PyString_GET_SIZE(v)
+        elif PyBytes_Check(v):
+            l = PyBytes_GET_SIZE(v)
+        elif PyUnicode_Check(v):
+            l = PyUnicode_GET_SIZE(v)
+
+        if l > m:
+            m = l
 
     return m
 

From 9be124dbbb7d0a97ed58c94873ae7c7f356246eb Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 15:20:54 -0400
Subject: [PATCH 125/239] Improve perf

---
 pandas/lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 88e9ee66e62a8..3441a1f979bf9 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -901,7 +901,7 @@ def clean_index_list(list obj):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def max_len_string_array(ndarray arr):
+def max_len_string_array(ndarray[object] arr):
     """ return the maximum size of elements in a 1-dim string array """
     cdef:
         Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]

From ac622b369a9763d0e73e98135da79c33cbedfdf7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 15:29:26 -0400
Subject: [PATCH 126/239] Use a fused type

---
 pandas/lib.pyx | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 3441a1f979bf9..ab37514c572e6 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -899,13 +899,29 @@ def clean_index_list(list obj):
 
     return maybe_convert_objects(converted), 0
 
+
+ctypedef fused pandas_t:
+    str
+    unicode
+    float32_t
+    float64_t
+    uint8_t
+    uint16_t
+    uint32_t
+    uint64_t
+    int8_t
+    int16_t
+    int32_t
+    int64_t
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def max_len_string_array(ndarray[object] arr):
+def max_len_string_array(pandas_t[:] arr):
     """ return the maximum size of elements in a 1-dim string array """
     cdef:
         Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
-        object v
+        pandas_t v
 
     for i in range(length):
         v = arr[i]

From 3b5f8cdf410bb60f2e376e34e16566d551a852b5 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 15:46:37 -0400
Subject: [PATCH 127/239] Ensure object on stata

---
 pandas/io/stata.py       |  4 ++--
 pandas/lib.pyx           | 12 +-----------
 pandas/tests/test_lib.py |  2 +-
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 3972bad7b2d83..eecc225d06beb 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1626,7 +1626,7 @@ def _dtype_to_stata_type(dtype, column):
     elif dtype.type == np.object_:  # try to coerce it to the biggest string
                                     # not memory efficient, what else could we
                                     # do?
-        itemsize = max_len_string_array(column.values)
+        itemsize = max_len_string_array(com._ensure_object(column.values))
         return chr(max(itemsize, 1))
     elif dtype == np.float64:
         return chr(255)
@@ -1664,7 +1664,7 @@ def _dtype_to_default_stata_fmt(dtype, column):
         if not (inferred_dtype in ('string', 'unicode')
                 or len(column) == 0):
             raise ValueError('Writing general object arrays is not supported')
-        itemsize = max_len_string_array(column.values)
+        itemsize = max_len_string_array(com._ensure_object(column.values))
         if itemsize > 244:
             raise ValueError(excessive_string_length_error % column.name)
         return "%" + str(max(itemsize, 1)) + "s"
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index ab37514c572e6..2519ef7e39f28 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -903,21 +903,11 @@ def clean_index_list(list obj):
 ctypedef fused pandas_t:
     str
     unicode
-    float32_t
-    float64_t
-    uint8_t
-    uint16_t
-    uint32_t
-    uint64_t
-    int8_t
-    int16_t
-    int32_t
-    int64_t
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def max_len_string_array(pandas_t[:] arr):
+cpdef Py_ssize_t max_len_string_array(pandas_t[:] arr):
     """ return the maximum size of elements in a 1-dim string array """
     cdef:
         Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py
index bb860269c5144..a963b878531c7 100644
--- a/pandas/tests/test_lib.py
+++ b/pandas/tests/test_lib.py
@@ -16,7 +16,7 @@ def test_max_len_string_array(self):
         self.assertTrue(max_len_string_array(arr),3)
 
         # unicode
-        arr = arr.astype('U')
+        arr = arr.astype('U').astype(object)
         self.assertTrue(max_len_string_array(arr),3)
 
 class TestIsscalar(tm.TestCase):

From 759d5642c4188155f245444a2b3217417ade7395 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 16:41:18 -0400
Subject: [PATCH 128/239] Test that we do not accept unicode

---
 pandas/tests/test_lib.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py
index a963b878531c7..5bf7e3c763ecc 100644
--- a/pandas/tests/test_lib.py
+++ b/pandas/tests/test_lib.py
@@ -19,6 +19,10 @@ def test_max_len_string_array(self):
         arr = arr.astype('U').astype(object)
         self.assertTrue(max_len_string_array(arr),3)
 
+        # raises
+        tm.assertRaises(TypeError,
+                        lambda: max_len_string_array(arr.astype('U')))
+
 class TestIsscalar(tm.TestCase):
     def test_isscalar_builtin_scalars(self):
         self.assertTrue(isscalar(None))

From 8bd7fc579d483c059adf16173458bce53759b16f Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 21:00:07 -0400
Subject: [PATCH 129/239] Use proper types so that we work with python3

---
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 pandas/lib.pyx                  | 10 ++++++++--
 pandas/tests/test_lib.py        | 15 +++++++++++----
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 4c666704fe039..dd4a23985b147 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -171,7 +171,7 @@ Performance Improvements
 
 - Improved csv write performance with mixed dtypes, including datetimes by up to 5x (:issue:`9940`)
 - Improved csv write performance generally by 2x (:issue:`9940`)
-
+- Improved the performance of ``pd.lib.max_len_string_array`` by 5-7x (:issue:`10024`)
 
 
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 2519ef7e39f28..a91ffebe455ad 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1,6 +1,7 @@
 cimport numpy as np
 cimport cython
 import numpy as np
+import sys
 
 from numpy cimport *
 
@@ -10,6 +11,7 @@ cdef extern from "numpy/arrayobject.h":
     cdef enum NPY_TYPES:
         NPY_intp "NPY_INTP"
 
+
 from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       PyDict_Contains, PyDict_Keys,
                       Py_INCREF, PyTuple_SET_ITEM,
@@ -19,10 +21,14 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       PyTuple_SetItem,
                       PyTuple_New,
                       PyObject_SetAttrString,
-                      PyString_GET_SIZE,
                       PyBytes_GET_SIZE,
                       PyUnicode_GET_SIZE)
 
+try:
+    from cpython cimport PyString_GET_SIZE
+except ImportError:
+    from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
+
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
 
@@ -35,7 +41,6 @@ cdef extern from "Python.h":
         Py_ssize_t *slicelength) except -1
 
 
-
 cimport cpython
 
 isnan = np.isnan
@@ -903,6 +908,7 @@ def clean_index_list(list obj):
 ctypedef fused pandas_t:
     str
     unicode
+    bytes
 
 
 @cython.boundscheck(False)
diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py
index 5bf7e3c763ecc..4c134b25636a7 100644
--- a/pandas/tests/test_lib.py
+++ b/pandas/tests/test_lib.py
@@ -8,22 +8,29 @@
 import pandas.util.testing as tm
 from pandas.compat import u
 
+
 class TestMisc(tm.TestCase):
 
     def test_max_len_string_array(self):
 
-        arr = np.array(['foo','b',np.nan],dtype='object')
-        self.assertTrue(max_len_string_array(arr),3)
+        arr = a = np.array(['foo', 'b', np.nan], dtype='object')
+        self.assertTrue(max_len_string_array(arr), 3)
 
         # unicode
-        arr = arr.astype('U').astype(object)
-        self.assertTrue(max_len_string_array(arr),3)
+        arr = a.astype('U').astype(object)
+        self.assertTrue(max_len_string_array(arr), 3)
+
+        # bytes for python3
+        arr = a.astype('S').astype(object)
+        self.assertTrue(max_len_string_array(arr), 3)
 
         # raises
         tm.assertRaises(TypeError,
                         lambda: max_len_string_array(arr.astype('U')))
 
+
 class TestIsscalar(tm.TestCase):
+
     def test_isscalar_builtin_scalars(self):
         self.assertTrue(isscalar(None))
         self.assertTrue(isscalar(True))

From 9c048a4d61f647b2af2633ce6e06583d36973ad3 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 29 Apr 2015 21:06:23 -0400
Subject: [PATCH 130/239] Better name for fused type

---
 pandas/lib.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index a91ffebe455ad..cc4c43494176e 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -905,7 +905,7 @@ def clean_index_list(list obj):
     return maybe_convert_objects(converted), 0
 
 
-ctypedef fused pandas_t:
+ctypedef fused pandas_string:
     str
     unicode
     bytes
@@ -913,11 +913,11 @@ ctypedef fused pandas_t:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cpdef Py_ssize_t max_len_string_array(pandas_t[:] arr):
+cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr):
     """ return the maximum size of elements in a 1-dim string array """
     cdef:
         Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
-        pandas_t v
+        pandas_string v
 
     for i in range(length):
         v = arr[i]

From 53abf757ac37bd8b7d763e065fa8ca403483a2c8 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 28 Apr 2015 19:03:01 -0700
Subject: [PATCH 131/239] DOC: added fredapi module to ecosystem list

---
 doc/source/ecosystem.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index a88c19c6edd55..194baba807d14 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -146,6 +146,15 @@ the European statistics office (Eurostat)
 and the European Central Bank (ECB). Datasets may be returned as pandas Series 
 or multi-indexed DataFrames.   
 
+`fredapi <https://github.com/mortada/fredapi>`_
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) <http://research.stlouisfed.org/fred2/>`__
+provided by the Federal Reserve Bank of St. Louis. It works with both the FRED database and ALFRED database that
+contains point-in-time data (i.e. historic data revisions). fredapi provides a wrapper in python to the FRED
+HTTP API, and also provides several conveninent methods for parsing and analyzing point-in-time data from ALFRED.
+fredapi makes use of pandas and returns data in a Series or DataFrame. This module requires a FRED API key that
+you can obtain for free on the FRED website.
+
 
 .. _ecosystem.domain:
 

From c40f927b587c44ca8e4211dd4b88e28bb970a4a6 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Thu, 12 Mar 2015 12:22:22 -0700
Subject: [PATCH 132/239] ENH: Add sample function with tests and docs (GH2419)

---
 doc/source/api.rst              |   3 +
 doc/source/indexing.rst         |  75 ++++++++++++++
 doc/source/whatsnew/v0.16.1.txt |  45 ++++++++-
 pandas/core/common.py           |  27 +++++
 pandas/core/generic.py          |  99 +++++++++++++++++-
 pandas/tests/test_common.py     |  20 ++++
 pandas/tests/test_generic.py    | 172 ++++++++++++++++++++++++++++++++
 7 files changed, 439 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 87e9b20f97e69..d442d8631247c 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -390,6 +390,7 @@ Reindexing / Selection / Label manipulation
    Series.reindex_like
    Series.rename
    Series.reset_index
+   Series.sample
    Series.select
    Series.take
    Series.tail
@@ -824,6 +825,7 @@ Reindexing / Selection / Label manipulation
    DataFrame.reindex_like
    DataFrame.rename
    DataFrame.reset_index
+   DataFrame.sample
    DataFrame.select
    DataFrame.set_index
    DataFrame.tail
@@ -1072,6 +1074,7 @@ Reindexing / Selection / Label manipulation
    Panel.reindex_axis
    Panel.reindex_like
    Panel.rename
+   Panel.sample
    Panel.select
    Panel.take
    Panel.truncate
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 1729a9d76cacd..a1912032bc3bf 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -508,6 +508,81 @@ A list of indexers where any element is out of bounds will raise an
 
 .. _indexing.basics.partial_setting:
 
+Selecting Random Samples
+------------------------
+.. versionadded::0.16.1
+
+A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows.
+
+.. ipython :: python
+
+    s = Series([0,1,2,3,4,5])
+
+    # When no arguments are passed, returns 1 row.
+    s.sample()
+
+    # One may specify either a number of rows:
+    s.sample(n=3)
+
+    # Or a fraction of the rows:
+    s.sample(frac=0.5)
+
+By default, ``sample`` will return each row at most once, but one can also sample with replacement
+using the ``replace`` option:
+
+.. ipython :: python
+
+   s = Series([0,1,2,3,4,5])
+
+    # Without replacement (default):
+    s.sample(n=6, replace=False)
+
+    # With replacement:
+    s.sample(n=6, replace=True)
+
+
+By default, each row has an equal probability of being selected, but if you want rows
+to have different probabilities, you can pass the ``sample`` function sampling weights as
+``weights``. These weights can be a list, a numpy array, or a Series, but they must be of the same length as the object you are sampling. Missing values will be treated as a weight of zero, and inf values are not allowed. If weights do not sum to 1, they will be re-normalized by dividing all weights by the sum of the weights. For example:
+
+.. ipython :: python
+
+    s = Series([0,1,2,3,4,5])
+    example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4]
+    s.sample(n=3, weights=example_weights)
+
+    # Weights will be re-normalized automatically
+    example_weights2 = [0.5, 0, 0, 0, 0, 0]
+    s.sample(n=1, weights=example_weights2)
+
+When applied to a DataFrame, you can use a column of the DataFrame as sampling weights
+(provided you are sampling rows and not columns) by simply passing the name of the column
+as a string.
+
+.. ipython :: python
+
+    df2 = DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
+    df2.sample(n = 3, weights = 'weight_column')
+
+``sample`` also allows users to sample columns instead of rows using the ``axis`` argument.
+
+.. 	ipython :: python
+
+    df3 = DataFrame({'col1':[1,2,3], 'col2':[2,3,4]})
+    df3.sample(n=1, axis=1)
+
+Finally, one can also set a seed for ``sample``'s random number generator using the ``random_state`` argument, which will accept either an integer (as a seed) or a numpy RandomState object.
+
+.. 	ipython :: python
+
+    df4 = DataFrame({'col1':[1,2,3], 'col2':[2,3,4]})
+
+    # With a given seed, the sample will always draw the same rows.
+    df4.sample(n=2, random_state=2)
+    df4.sample(n=2, random_state=2)
+
+
+
 Setting With Enlargement
 ------------------------
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index dd4a23985b147..09d0d6cbbfaf8 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -12,11 +12,12 @@ Highlights include:
 - Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
 - New section on how-to-contribute to *pandas*, see :ref`here <contributing>`
 
+- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enchancements.sample>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
 
-
 .. _whatsnew_0161.enhancements:
 
 Enhancements
@@ -138,6 +139,48 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
 
 See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
 
+.. _whatsnew_0161.enhancements.sample:
+
+Sample
+^^^^^^
+
+Series, DataFrames, and Panels now have a new method: :meth:`~pandas.DataFrame.sample`.
+The method accepts a specific number of rows or columns to return, or a fraction of the
+total number or rows or columns. It also has options for sampling with or without replacement,
+for passing in a column for weights for non-uniform sampling, and for setting seed values to
+facilitate replication. (:issue:`2419`)
+
+.. ipython :: python
+
+   example_series = Series([0,1,2,3,4,5])
+
+   # When no arguments are passed, returns 1
+   example_series.sample()
+
+   # One may specify either a number of rows:
+   example_series.sample(n=3)
+
+   # Or a fraction of the rows:
+   example_series.sample(frac=0.5)
+
+   # weights are accepted.
+   example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4]
+   example_series.sample(n=3, weights=example_weights)
+
+   # weights will also be normalized if they do not sum to one,
+   # and missing values will be treated as zeros.
+   example_weights2 = [0.5, 0, 0, 0, None, np.nan]
+   example_series.sample(n=1, weights=example_weights2)
+
+
+When applied to a DataFrame, one may pass the name of a column to specify sampling weights
+when sampling from rows.
+
+.. ipython :: python
+
+   df = DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
+   df.sample(n=3, weights='weight_column')
+
 .. _whatsnew_0161.api:
 
 API changes
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 3d23aeff942dc..8535cf1566e2d 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -3319,3 +3319,30 @@ def _maybe_match_name(a, b):
     if a_name == b_name:
         return a_name
     return None
+
+def _random_state(state=None):
+    """
+    Helper function for processing random_state arguments.
+
+    Parameters
+    ----------
+    state : int, np.random.RandomState, None.
+        If receives an int, passes to np.random.RandomState() as seed.
+        If receives an np.random.RandomState object, just returns object.
+        If receives `None`, returns an np.random.RandomState object.
+        If receives anything else, raises an informative ValueError.
+        Default None.
+
+    Returns
+    -------
+    np.random.RandomState
+    """
+
+    if is_integer(state):
+        return np.random.RandomState(state)
+    elif isinstance(state, np.random.RandomState):
+        return state
+    elif state is None:
+        return np.random.RandomState()
+    else:
+        raise ValueError("random_state must be an integer, a numpy RandomState, or None")
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a4c78e2fc9bf1..8f1eb81c6c362 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1948,6 +1948,103 @@ def tail(self, n=5):
             return self
         return self.iloc[-n:]
 
+
+    def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None):
+        """
+        Returns a random sample of items from an axis of object.
+
+        Parameters
+        ----------
+        n : int, optional
+            Number of items from axis to return. Cannot be used with `frac`.
+            Default = 1 if `frac` = None.
+        frac : float, optional
+            Fraction of axis items to return. Cannot be used with `n`.
+        replace : boolean, optional
+            Sample with or without replacement. Default = False.
+        weights : str or ndarray-like, optional
+            Default 'None' results in equal probability weighting.
+            If called on a DataFrame, will accept the name of a column
+            when axis = 0.
+            Weights must be same length as axis being sampled.
+            If weights do not sum to 1, they will be normalized to sum to 1.
+            Missing values in the weights column will be treated as zero.
+            inf and -inf values not allowed.
+        random_state : int or numpy.random.RandomState, optional
+            Seed for the random number generator (if int), or numpy RandomState
+            object.
+        axis : int or string, optional
+            Axis to sample. Accepts axis number or name. Default is stat axis
+            for given data type (0 for Series and DataFrames, 1 for Panels).
+
+        Returns
+        -------
+        Same type as caller.
+        """
+
+        if axis is None:
+            axis = self._stat_axis_number
+
+        axis = self._get_axis_number(axis)
+        axis_length = self.shape[axis]
+
+        # Process random_state argument
+        rs = com._random_state(random_state)
+
+        # Check weights for compliance
+        if weights is not None:
+
+            # Strings acceptable if a dataframe and axis = 0
+            if isinstance(weights, string_types):
+                if isinstance(self, pd.DataFrame):
+                    if axis == 0:
+                        try:
+                            weights = self[weights]
+                        except KeyError:
+                            raise KeyError("String passed to weights not a valid column")
+                    else:
+                        raise ValueError("Strings can only be passed to weights when sampling from rows on a DataFrame")
+                else:
+                    raise ValueError("Strings cannot be passed as weights when sampling from a Series or Panel.")
+
+            weights = pd.Series(weights, dtype='float64')
+
+            if len(weights) != axis_length:
+                raise ValueError("Weights and axis to be sampled must be of same length")
+
+            if (weights == np.inf).any() or (weights == -np.inf).any():
+                raise ValueError("weight vector may not include `inf` values")
+
+            if (weights < 0).any():
+                raise ValueError("weight vector many not include negative values")
+
+            # If has nan, set to zero.
+            weights = weights.fillna(0)
+
+            # Renormalize if don't sum to 1
+            if weights.sum() != 1:
+                weights = weights / weights.sum()
+
+            weights = weights.values
+
+        # If no frac or n, default to n=1.
+        if n is None and frac is None:
+            n = 1
+        elif n is not None and frac is None and n % 1 != 0:
+            raise ValueError("Only integers accepted as `n` values")
+        elif n is None and frac is not None:
+            n = int(round(frac * axis_length))
+        elif n is not None and frac is not None:
+            raise ValueError('Please enter a value for `frac` OR `n`, not both')
+
+        # Check for negative sizes
+        if n < 0:
+            raise ValueError("A negative number of rows requested. Please provide positive value.")
+
+        locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
+        return self.take(locs, axis=axis)
+
+
     #----------------------------------------------------------------------
     # Attribute access
 
@@ -3395,7 +3492,7 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
 
                 matches = (new_other == np.array(other))
                 if matches is False or not matches.all():
-                    
+
                     # coerce other to a common dtype if we can
                     if com.needs_i8_conversion(self.dtype):
                         try:
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index d0ae7c9988c8d..0daea15e617a3 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -524,6 +524,26 @@ def test_is_recompilable():
     for f in fails:
         assert not com.is_re_compilable(f)
 
+def test_random_state():
+    import numpy.random as npr
+    # Check with seed
+    state = com._random_state(5)
+    assert_equal(state.uniform(), npr.RandomState(5).uniform())
+
+    # Check with random state object
+    state2 = npr.RandomState(10)
+    assert_equal(com._random_state(state2).uniform(), npr.RandomState(10).uniform())
+
+    # check with no arg random state
+    assert isinstance(com._random_state(), npr.RandomState)
+
+    # Error for floats or strings
+    with tm.assertRaises(ValueError):
+        com._random_state('test')
+
+    with tm.assertRaises(ValueError):
+        com._random_state(5.5)
+
 
 class TestTake(tm.TestCase):
     # standard incompatible fill error
diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
index 3dd8c2594cd46..3f751310438e4 100644
--- a/pandas/tests/test_generic.py
+++ b/pandas/tests/test_generic.py
@@ -354,6 +354,178 @@ def test_head_tail(self):
             self._compare(o.head(-3), o.head(7))
             self._compare(o.tail(-3), o.tail(7))
 
+    def test_sample(self):
+        # Fixes issue: 2419
+
+        o = self._construct(shape=10)
+
+        ###
+        # Check behavior of random_state argument
+        ###
+
+        # Check for stability when receives seed or random state -- run 10 times.
+        for test in range(10):
+            seed = np.random.randint(0,100)
+            self._compare(o.sample(n=4, random_state=seed), o.sample(n=4, random_state=seed))
+            self._compare(o.sample(frac=0.7,random_state=seed), o.sample(frac=0.7, random_state=seed))
+
+            self._compare(o.sample(n=4, random_state=np.random.RandomState(test)),
+                          o.sample(n=4, random_state=np.random.RandomState(test)))
+
+            self._compare(o.sample(frac=0.7,random_state=np.random.RandomState(test)),
+                          o.sample(frac=0.7, random_state=np.random.RandomState(test)))
+
+
+        # Check for error when random_state argument invalid.
+        with tm.assertRaises(ValueError):
+            o.sample(random_state='astring!')
+
+        ###
+        # Check behavior of `frac` and `N`
+        ###
+
+        # Giving both frac and N throws error
+        with tm.assertRaises(ValueError):
+            o.sample(n=3, frac=0.3)
+
+        # Check that raises right error for negative lengths
+        with tm.assertRaises(ValueError):
+            o.sample(n=-3)
+        with tm.assertRaises(ValueError):
+            o.sample(frac=-0.3)
+
+        # Make sure float values of `n` give error
+        with tm.assertRaises(ValueError):
+            o.sample(n= 3.2)
+
+        # Check lengths are right
+        self.assertTrue(len(o.sample(n=4) == 4))
+        self.assertTrue(len(o.sample(frac=0.34) == 3))
+        self.assertTrue(len(o.sample(frac=0.36) == 4))
+
+        ###
+        # Check weights
+        ###
+
+        # Weight length must be right
+        with tm.assertRaises(ValueError):
+            o.sample(n=3, weights=[0,1])
+
+        with tm.assertRaises(ValueError):
+            bad_weights = [0.5]*11
+            o.sample(n=3, weights=bad_weights)
+
+        # Check won't accept negative weights
+        with tm.assertRaises(ValueError):
+            bad_weights = [-0.1]*10
+            o.sample(n=3, weights=bad_weights)
+
+        # Check inf and -inf throw errors:
+        with tm.assertRaises(ValueError):
+            weights_with_inf = [0.1]*10
+            weights_with_inf[0] = np.inf
+            o.sample(n=3, weights=weights_with_inf)
+
+        with tm.assertRaises(ValueError):
+            weights_with_ninf = [0.1]*10
+            weights_with_ninf[0] =  -np.inf
+            o.sample(n=3, weights=weights_with_ninf)
+
+
+        # A few dataframe test with degenerate weights.
+        easy_weight_list = [0]*10
+        easy_weight_list[5] = 1
+
+        df = pd.DataFrame({'col1':range(10,20),
+                           'col2':range(20,30),
+                           'colString': ['a']*10,
+                           'easyweights':easy_weight_list})
+        sample1 = df.sample(n=1, weights='easyweights')
+        assert_frame_equal(sample1, df.iloc[5:6])
+
+        # Ensure proper error if string given as weight for Series, panel, or
+        # DataFrame with axis = 1.
+        s = Series(range(10))
+        with tm.assertRaises(ValueError):
+            s.sample(n=3, weights='weight_column')
+
+        panel = pd.Panel(items = [0,1,2], major_axis = [2,3,4], minor_axis = [3,4,5])
+        with tm.assertRaises(ValueError):
+            panel.sample(n=1, weights='weight_column')
+
+        with tm.assertRaises(ValueError):
+            df.sample(n=1, weights='weight_column', axis = 1)
+
+        # Check weighting key error
+        with tm.assertRaises(KeyError):
+            df.sample(n=3, weights='not_a_real_column_name')
+
+         # Check np.nan are replaced by zeros.
+        weights_with_nan = [np.nan]*10
+        weights_with_nan[5] = 0.5
+        self._compare(o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6])
+
+        # Check None are also replaced by zeros.
+        weights_with_None = [None]*10
+        weights_with_None[5] = 0.5
+        self._compare(o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6])
+
+        # Check that re-normalizes weights that don't sum to one.
+        weights_less_than_1 = [0]*10
+        weights_less_than_1[0] = 0.5
+        tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1])
+
+
+        ###
+        # Test axis argument
+        ###
+
+        # Test axis argument
+        df = pd.DataFrame({'col1':range(10), 'col2':['a']*10})
+        second_column_weight = [0,1]
+        assert_frame_equal(df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']])
+
+        # Different axis arg types
+        assert_frame_equal(df.sample(n=1, axis='columns', weights=second_column_weight),
+                           df[['col2']])
+
+        weight = [0]*10
+        weight[5] = 0.5
+        assert_frame_equal(df.sample(n=1, axis='rows', weights=weight),
+                           df.iloc[5:6])
+        assert_frame_equal(df.sample(n=1, axis='index', weights=weight),
+                           df.iloc[5:6])
+
+
+        # Check out of range axis values
+        with tm.assertRaises(ValueError):
+            df.sample(n=1, axis=2)
+
+        with tm.assertRaises(ValueError):
+            df.sample(n=1, axis='not_a_name')
+
+        with tm.assertRaises(ValueError):
+            s = pd.Series(range(10))
+            s.sample(n=1, axis=1)
+
+        # Test weight length compared to correct axis
+        with tm.assertRaises(ValueError):
+            df.sample(n=1, axis=1, weights=[0.5]*10)
+
+        # Check weights with axis = 1
+        easy_weight_list = [0]*3
+        easy_weight_list[2] = 1
+
+        df = pd.DataFrame({'col1':range(10,20),
+                           'col2':range(20,30),
+                           'colString': ['a']*10})
+        sample1 = df.sample(n=1, axis=1, weights=easy_weight_list)
+        assert_frame_equal(sample1, df[['colString']])
+
+        # Test default axes
+        p = pd.Panel(items = ['a','b','c'], major_axis=[2,4,6], minor_axis=[1,3,5])
+        assert_panel_equal(p.sample(n=3, random_state=42), p.sample(n=3, axis=1, random_state=42))
+        assert_frame_equal(df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42))
 
     def test_size_compat(self):
         # GH8846

From 49cf90abb5aa811b2d951a2950c8cd6ae02eaa60 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 1 May 2015 12:23:05 -0400
Subject: [PATCH 133/239] COMPAT: windows test dtype adjustments

---
 pandas/tests/test_frame.py    | 4 ++--
 pandas/tests/test_indexing.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index cf7523b34595a..3f60f10e81013 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -11860,7 +11860,7 @@ def test_mode(self):
         df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11],
                            "B": [10, 10, 10, np.nan, 3, 4],
                            "C": [8, 8, 8, 9, 9, 9],
-                           "D": range(6),
+                           "D": np.arange(6,dtype='int64'),
                            "E": [8, 8, 1, 1, 3, 3]})
         assert_frame_equal(df[["A"]].mode(),
                            pd.DataFrame({"A": [12]}))
@@ -11889,7 +11889,7 @@ def test_mode(self):
         com.pprint_thing(b)
         assert_frame_equal(a, b)
         # should work with heterogeneous types
-        df = pd.DataFrame({"A": range(6),
+        df = pd.DataFrame({"A": np.arange(6,dtype='int64'),
                            "B": pd.date_range('2011', periods=6),
                            "C": list('abcdef')})
         exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype),
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index ece690ad21bcc..1bea124bb9f81 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -4448,7 +4448,7 @@ def test_indexing_dtypes_on_empty(self):
         df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']})
         df2 = df.ix[[],:]
 
-        self.assertEqual(df2.loc[:,'a'].dtype, int)
+        self.assertEqual(df2.loc[:,'a'].dtype, np.int64)
         assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0])
         assert_series_equal(df2.loc[:,'a'], df2.ix[:,0])
 

From ff727a397ca8adfbf61651ee0749007515faeea8 Mon Sep 17 00:00:00 2001
From: Chris Grinolds <cgrinolds@zulily.com>
Date: Thu, 30 Apr 2015 09:10:56 -0700
Subject: [PATCH 134/239] Updates to Google BigQuery connector (#9713, #8327)

commit 3d6fdc8751134f6e5bac700519358cf9a700aba1
Author: Chris Grinolds <cgrinolds@mac.com>
Date:   Wed Apr 29 21:43:10 2015 -0700

    Added missing imports that caused tests to fail

commit 6c345d8dbc0c36bd4369dd220eceb495ab5ff2c6
Author: Chris Grinolds <cgrinolds@zulily.com>
Date:   Wed Apr 29 16:23:01 2015 -0700

    Updated test suite to handle changes to gbq.py

commit f43e65f924d07f88bbbe308b9ea396b1f2a720f9
Author: Chris Grinolds <cgrinolds@zulily.com>
Date:   Wed Apr 29 08:46:00 2015 -0700

    Updated BigQuery connector to no longer use deprecated ```oauth2client.tools.run() (#8327)

commit ea92e200ff341a3025b06868a565b1eea506c4c2
Author: Chris Grinolds <cgrinolds@zulily.com>
Date:   Wed Apr 29 08:43:43 2015 -0700

    Import BigQuery dependencies on a per-method basis (#9713)
---
 doc/source/whatsnew/v0.16.1.txt |   5 ++
 pandas/io/gbq.py                | 144 ++++++++++----------------------
 pandas/io/tests/test_gbq.py     |  66 ++++++++++++++-
 3 files changed, 115 insertions(+), 100 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 09d0d6cbbfaf8..cf8b5ce9ae0f4 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -402,4 +402,9 @@ Bug Fixes
 =======
 - Bug in ``transform`` and ``filter`` when grouping on a categorical variable (:issue:`9921`)
 - Bug in ``transform`` when groups are equal in number and dtype to the input index (:issue:`9700`)
+<<<<<<< HEAD
 >>>>>>> 3d73550... BUG: transform and filter misbehave when grouping on categorical data (GH 9921)
+=======
+- Google BigQuery connector now imports dependencies on a per-method basis.(:issue:`9713`)
+- Updated BigQuery connector to no longer use deprecated ``oauth2client.tools.run()`` (:issue:`8327`)
+>>>>>>> 2cf4132... Updates to Google BigQuery connector (#9713, #8327)
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 9ecffb382e151..f1fcc822adeaf 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -13,75 +13,22 @@
 from pandas.tools.merge import concat
 from pandas.core.common import PandasError
 
-_IMPORTS = False
-_GOOGLE_API_CLIENT_INSTALLED = False
-_GOOGLE_API_CLIENT_VALID_VERSION = False
-_GOOGLE_FLAGS_INSTALLED = False
-_GOOGLE_FLAGS_VALID_VERSION = False
-_HTTPLIB2_INSTALLED = False
-_SETUPTOOLS_INSTALLED = False
 
-def _importers():
-    # import things we need
-    # but make this done on a first use basis
-
-    global _IMPORTS
-    if  _IMPORTS:
-        return
-
-    _IMPORTS = True
-
-    if not compat.PY3:
-
-        global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
-               _GOOGLE_FLAGS_INSTALLED, _GOOGLE_FLAGS_VALID_VERSION, \
-               _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
-
-        try:
-            import pkg_resources
-            _SETUPTOOLS_INSTALLED = True
-        except ImportError:
-            _SETUPTOOLS_INSTALLED = False
-
-        if _SETUPTOOLS_INSTALLED:
-            try:
-                from apiclient.discovery import build
-                from apiclient.http import MediaFileUpload
-                from apiclient.errors import HttpError
-
-                from oauth2client.client import OAuth2WebServerFlow
-                from oauth2client.client import AccessTokenRefreshError
-                from oauth2client.client import flow_from_clientsecrets
-                from oauth2client.file import Storage
-                from oauth2client.tools import run
-                _GOOGLE_API_CLIENT_INSTALLED=True
-                _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
-
-                if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0':
-                    _GOOGLE_API_CLIENT_VALID_VERSION = True
-
-            except ImportError:
-                _GOOGLE_API_CLIENT_INSTALLED = False
-
-
-            try:
-                import gflags as flags
-                _GOOGLE_FLAGS_INSTALLED = True
-
-                _GOOGLE_FLAGS_VERSION = pkg_resources.get_distribution('python-gflags').version
+def _check_google_client_version():
+    if compat.PY3:
+        raise NotImplementedError("Google's libraries do not support Python 3 yet")
 
-                if LooseVersion(_GOOGLE_FLAGS_VERSION) >= '2.0':
-                    _GOOGLE_FLAGS_VALID_VERSION = True
+    try:
+        import pkg_resources
 
-            except ImportError:
-                _GOOGLE_FLAGS_INSTALLED = False
+    except ImportError:
+        raise ImportError('Could not import pkg_resources (setuptools).')
 
-            try:
-                import httplib2
-                _HTTPLIB2_INSTALLED = True
-            except ImportError:
-                _HTTPLIB2_INSTALLED = False
+    _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
 
+    if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < '1.2.0':
+        raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google "
+                          "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION)
 
 logger = logging.getLogger('pandas.io.gbq')
 logger.setLevel(logging.ERROR)
@@ -142,6 +89,16 @@ def __init__(self, project_id, reauth=False):
         self.service        = self.get_service(self.credentials)
 
     def get_credentials(self):
+        try:
+            from oauth2client.client import OAuth2WebServerFlow
+            from oauth2client.file import Storage
+            from oauth2client.tools import run_flow, argparser
+
+        except ImportError:
+            raise ImportError('Could not import Google API Client.')
+
+        _check_google_client_version()
+
         flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
                                    client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
                                    scope='https://www.googleapis.com/auth/bigquery',
@@ -151,11 +108,25 @@ def get_credentials(self):
         credentials = storage.get()
 
         if credentials is None or credentials.invalid or self.reauth:
-            credentials = run(flow, storage)
+            credentials = run_flow(flow, storage, argparser.parse_args([]))
 
         return credentials
 
     def get_service(self, credentials):
+        try:
+            import httplib2
+
+        except ImportError:
+            raise ImportError("pandas requires httplib2 for Google BigQuery support")
+
+        try:
+            from apiclient.discovery import build
+        
+        except ImportError:
+            raise ImportError('Could not import Google API Client.')
+
+        _check_google_client_version()
+
         http = httplib2.Http()
         http = credentials.authorize(http)
         bigquery_service = build('bigquery', 'v2', http=http)
@@ -163,6 +134,15 @@ def get_service(self, credentials):
         return bigquery_service
 
     def run_query(self, query):
+        try:
+            from apiclient.errors import HttpError
+            from oauth2client.client import AccessTokenRefreshError
+
+        except ImportError:
+            raise ImportError('Could not import Google API Client.')
+
+        _check_google_client_version()
+
         job_collection = self.service.jobs()
         job_data = {
             'configuration': {
@@ -313,38 +293,6 @@ def _parse_entry(field_value, field_type):
         return field_value == 'true'
     return field_value
 
-def _test_imports():
-
-    _importers()
-    _GOOGLE_API_CLIENT_INSTALLED
-    _GOOGLE_API_CLIENT_VALID_VERSION
-    _GOOGLE_FLAGS_INSTALLED
-    _GOOGLE_FLAGS_VALID_VERSION
-    _HTTPLIB2_INSTALLED
-    _SETUPTOOLS_INSTALLED
-
-    if compat.PY3:
-        raise NotImplementedError("Google's libraries do not support Python 3 yet")
-
-    if not _SETUPTOOLS_INSTALLED:
-        raise ImportError('Could not import pkg_resources (setuptools).')
-
-    if not _GOOGLE_API_CLIENT_INSTALLED:
-        raise ImportError('Could not import Google API Client.')
-
-    if not _GOOGLE_FLAGS_INSTALLED:
-        raise ImportError('Could not import Google Command Line Flags Module.')
-
-    if not _GOOGLE_API_CLIENT_VALID_VERSION:
-        raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google "
-                          "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION)
-
-    if not _GOOGLE_FLAGS_VALID_VERSION:
-        raise ImportError("pandas requires python-gflags >= 2.0.0 for Google "
-                          "BigQuery support, current version " + _GOOGLE_FLAGS_VERSION)
-
-    if not _HTTPLIB2_INSTALLED:
-        raise ImportError("pandas requires httplib2 for Google BigQuery support")
 
 def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False):
     """Load data from Google BigQuery.
@@ -379,7 +327,6 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
 
     """
 
-    _test_imports()
 
     if not project_id:
         raise TypeError("Missing required parameter: project_id")
@@ -450,7 +397,6 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
         if multiple accounts are used.
 
     """
-    _test_imports()
 
     if not project_id:
         raise TypeError("Missing required parameter: project_id")
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index 2f79cc8ba1826..5417842d3f863 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -12,6 +12,9 @@
 
 import numpy as np
 
+from distutils.version import LooseVersion
+from pandas import compat
+
 from pandas import NaT
 from pandas.compat import u
 from pandas.core.frame import DataFrame
@@ -22,6 +25,12 @@
 
 VERSION = platform.python_version()
 
+_IMPORTS = False
+_GOOGLE_API_CLIENT_INSTALLED = False
+_GOOGLE_API_CLIENT_VALID_VERSION = False
+_HTTPLIB2_INSTALLED = False
+_SETUPTOOLS_INSTALLED = False
+
 def missing_bq():
     try:
         subprocess.call('bq')
@@ -29,9 +38,64 @@ def missing_bq():
     except OSError:
         return True
 
+def _test_imports():
+    if not compat.PY3:
+
+        global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
+               _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
+
+        try:
+            import pkg_resources
+            _SETUPTOOLS_INSTALLED = True
+        except ImportError:
+            _SETUPTOOLS_INSTALLED = False
+
+        if _SETUPTOOLS_INSTALLED:
+            try:
+                from apiclient.discovery import build
+                from apiclient.errors import HttpError
+
+                from oauth2client.client import OAuth2WebServerFlow
+                from oauth2client.client import AccessTokenRefreshError
+
+                from oauth2client.file import Storage
+                from oauth2client.tools import run_flow
+                _GOOGLE_API_CLIENT_INSTALLED=True
+                _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
+
+                if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0':
+                    _GOOGLE_API_CLIENT_VALID_VERSION = True
+
+            except ImportError:
+                _GOOGLE_API_CLIENT_INSTALLED = False
+
+
+            try:
+                import httplib2
+                _HTTPLIB2_INSTALLED = True
+            except ImportError:
+                _HTTPLIB2_INSTALLED = False
+    
+
+    if compat.PY3:
+        raise NotImplementedError("Google's libraries do not support Python 3 yet")
+
+    if not _SETUPTOOLS_INSTALLED:
+        raise ImportError('Could not import pkg_resources (setuptools).')
+
+    if not _GOOGLE_API_CLIENT_INSTALLED:
+        raise ImportError('Could not import Google API Client.')
+
+    if not _GOOGLE_API_CLIENT_VALID_VERSION:
+        raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google "
+                          "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION)
+
+    if not _HTTPLIB2_INSTALLED:
+        raise ImportError("pandas requires httplib2 for Google BigQuery support")
+
 def test_requirements():
     try:
-        gbq._test_imports()
+        _test_imports()
     except (ImportError, NotImplementedError) as import_exception:
         raise nose.SkipTest(import_exception)
 

From 78b2b627e818411c3e3ae396e143fe8b027c2dea Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 7 Feb 2015 14:19:33 +0900
Subject: [PATCH 135/239] DOC/CLN: Revise StringMethods docs

---
 pandas/core/strings.py | 354 ++++++++++++++++++++++++-----------------
 1 file changed, 207 insertions(+), 147 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 6d20907373014..3506338afd9d4 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -27,19 +27,42 @@ def _get_array_list(arr, others):
 
 def str_cat(arr, others=None, sep=None, na_rep=None):
     """
-    Concatenate arrays of strings with given separator
+    Concatenate strings in the Series/Index with given separator.
 
     Parameters
     ----------
-    arr : list or array-like
-    others : list or array, or list of arrays
+    others : list-like, or list of list-likes
+      If None, returns str concatenating strings of the Series
     sep : string or None, default None
     na_rep : string or None, default None
         If None, an NA in any array will propagate
 
     Returns
     -------
-    concat : array
+    concat : Series/Index of objects or str
+
+    Examples
+    --------
+    If ``others`` is specified, corresponding values are
+    concatenated with the separator. Result will be a Series of strings.
+
+    >>> Series(['a', 'b', 'c']).str.cat(['A', 'B', 'C'], sep=',')
+    0    a,A
+    1    b,B
+    2    c,C
+    dtype: object
+
+    Otherwise, strings in the Series are concatenated. Result will be a string.
+
+    >>> Series(['a', 'b', 'c']).str.cat(sep=',')
+    'a,b,c'
+
+    Also, you can pass a list of list-likes.
+
+    >>> Series(['a', 'b']).str.cat([['x', 'y'], ['1', '2']], sep=',')
+    0    a,x,1
+    1    b,y,2
+    dtype: object
     """
     if sep is None:
         sep = ''
@@ -130,18 +153,17 @@ def g(x):
 
 def str_count(arr, pat, flags=0):
     """
-    Count occurrences of pattern in each string
+    Count occurrences of pattern in each string of the Series/Index.
 
     Parameters
     ----------
-    arr : list or array-like
     pat : string, valid regular expression
     flags : int, default 0 (no flags)
         re module flags, e.g. re.IGNORECASE
 
     Returns
     -------
-    counts : arrays
+    counts : Series/Index of integer values
     """
     regex = re.compile(pat, flags=flags)
     f = lambda x: len(regex.findall(x))
@@ -150,7 +172,8 @@ def str_count(arr, pat, flags=0):
 
 def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
     """
-    Check whether given pattern is contained in each string in the array
+    Return boolean Series/``array`` whether given pattern/regex is
+    contained in each string in the Series/Index.
 
     Parameters
     ----------
@@ -166,7 +189,7 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
 
     Returns
     -------
-    Series of boolean values
+    contained : Series/array of boolean values
 
     See Also
     --------
@@ -197,8 +220,9 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
 
 def str_startswith(arr, pat, na=np.nan):
     """
-    Return boolean array indicating whether each string starts with passed
-    pattern
+    Return boolean Series/``array`` indicating whether each string in the
+    Series/Index starts with passed pattern. Equivalent to
+    :meth:`str.startswith`.
 
     Parameters
     ----------
@@ -208,7 +232,7 @@ def str_startswith(arr, pat, na=np.nan):
 
     Returns
     -------
-    startswith : array (boolean)
+    startswith : Series/array of boolean values
     """
     f = lambda x: x.startswith(pat)
     return _na_map(f, arr, na, dtype=bool)
@@ -216,8 +240,9 @@ def str_startswith(arr, pat, na=np.nan):
 
 def str_endswith(arr, pat, na=np.nan):
     """
-    Return boolean array indicating whether each string ends with passed
-    pattern
+    Return boolean Series indicating whether each string in the
+    Series/Index ends with passed pattern. Equivalent to
+    :meth:`str.endswith`.
 
     Parameters
     ----------
@@ -227,7 +252,7 @@ def str_endswith(arr, pat, na=np.nan):
 
     Returns
     -------
-    endswith : array (boolean)
+    endswith : Series/array of boolean values
     """
     f = lambda x: x.endswith(pat)
     return _na_map(f, arr, na, dtype=bool)
@@ -235,7 +260,9 @@ def str_endswith(arr, pat, na=np.nan):
 
 def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
     """
-    Replace
+    Replace occurrences of pattern/regex in the Series/Index with
+    some other string. Equivalent to :meth:`str.replace` or
+    :func:`re.sub`.
 
     Parameters
     ----------
@@ -252,7 +279,7 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
 
     Returns
     -------
-    replaced : array
+    replaced : Series/Index of objects
     """
     use_re = not case or len(pat) > 1 or flags
 
@@ -272,7 +299,8 @@ def f(x):
 
 def str_repeat(arr, repeats):
     """
-    Duplicate each string in the array by indicated number of times
+    Duplicate each string in the Series/Index by indicated number
+    of times.
 
     Parameters
     ----------
@@ -281,7 +309,7 @@ def str_repeat(arr, repeats):
 
     Returns
     -------
-    repeated : array
+    repeated : Series/Index of objects
     """
     if np.isscalar(repeats):
         def rep(x):
@@ -305,7 +333,8 @@ def rep(x, r):
 
 def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
     """
-    Deprecated: Find groups in each string using passed regular expression.
+    Deprecated: Find groups in each string in the Series/Index
+    using passed regular expression.
     If as_indexer=True, determine if each string matches a regular expression.
 
     Parameters
@@ -322,9 +351,9 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
 
     Returns
     -------
-    Series of boolean values
+    Series/array of boolean values
         if as_indexer=True
-    Series of tuples
+    Series/Index of tuples
         if as_indexer=False, default but deprecated
 
     See Also
@@ -359,6 +388,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
 
     if (not as_indexer) and regex.groups > 0:
         dtype = object
+
         def f(x):
             m = regex.match(x)
             if m:
@@ -382,7 +412,8 @@ def _get_single_group_name(rx):
 
 def str_extract(arr, pat, flags=0):
     """
-    Find groups in each string using passed regular expression
+    Find groups in each string in the Series using passed regular
+    expression.
 
     Parameters
     ----------
@@ -441,6 +472,7 @@ def str_extract(arr, pat, flags=0):
     if regex.groups == 0:
         raise ValueError("This pattern contains no groups to capture.")
     empty_row = [np.nan]*regex.groups
+
     def f(x):
         if not isinstance(x, compat.string_types):
             return empty_row
@@ -468,7 +500,17 @@ def f(x):
 
 def str_get_dummies(arr, sep='|'):
     """
-    Split each string by sep and return a frame of dummy/indicator variables.
+    Split each string in the Series by sep and return a frame of
+    dummy/indicator variables.
+
+    Parameters
+    ----------
+    sep : string, default "|"
+        String to split on.
+
+    Returns
+    -------
+    dummies : DataFrame
 
     Examples
     --------
@@ -478,14 +520,15 @@ def str_get_dummies(arr, sep='|'):
     1  1  0  0
     2  1  0  1
 
-    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
+    >>> Series(['a|b', np.nan, 'a|c']).str.get_dummies()
        a  b  c
     0  1  1  0
     1  0  0  0
     2  1  0  1
 
-    See also ``pd.get_dummies``.
-
+    See Also
+    --------
+    pandas.get_dummies
     """
     from pandas.core.frame import DataFrame
 
@@ -511,7 +554,8 @@ def str_get_dummies(arr, sep='|'):
 
 def str_join(arr, sep):
     """
-    Join lists contained as elements in array, a la str.join
+    Join lists contained as elements in the Series/Index with
+    passed delimiter. Equivalent to :meth:`str.join`.
 
     Parameters
     ----------
@@ -520,14 +564,15 @@ def str_join(arr, sep):
 
     Returns
     -------
-    joined : array
+    joined : Series/Index of objects
     """
     return _na_map(sep.join, arr)
 
 
 def str_findall(arr, pat, flags=0):
     """
-    Find all occurrences of pattern or regular expression
+    Find all occurrences of pattern or regular expression in the
+    Series/Index. Equivalent to :func:`re.findall`.
 
     Parameters
     ----------
@@ -538,7 +583,7 @@ def str_findall(arr, pat, flags=0):
 
     Returns
     -------
-    matches : array
+    matches : Series/Index of lists
     """
     regex = re.compile(pat, flags=flags)
     return _na_map(regex.findall, arr)
@@ -546,8 +591,8 @@ def str_findall(arr, pat, flags=0):
 
 def str_find(arr, sub, start=0, end=None, side='left'):
     """
-    Return indexes in each strings where the substring is
-    fully contained between [start:end]. Return -1 on failure.
+    Return indexes in each strings in the Series/Index where the
+    substring is fully contained between [start:end]. Return -1 on failure.
 
     Parameters
     ----------
@@ -562,7 +607,7 @@ def str_find(arr, sub, start=0, end=None, side='left'):
 
     Returns
     -------
-    found : array
+    found : Series/Index of integer values
     """
 
     if not isinstance(sub, compat.string_types):
@@ -586,11 +631,11 @@ def str_find(arr, sub, start=0, end=None, side='left'):
 
 def str_pad(arr, width, side='left', fillchar=' '):
     """
-    Pad strings with an additional character
+    Pad strings in the Series/Index with an additional character to
+    specified side.
 
     Parameters
     ----------
-    arr : list or array-like
     width : int
         Minimum width of resulting string; additional characters will be filled
         with spaces
@@ -600,7 +645,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
 
     Returns
     -------
-    padded : array
+    padded : Series/Index of objects
     """
 
     if not isinstance(fillchar, compat.string_types):
@@ -624,8 +669,8 @@ def str_pad(arr, width, side='left', fillchar=' '):
 
 def str_split(arr, pat=None, n=None, return_type='series'):
     """
-    Split each string (a la re.split) in array by given pattern, propagating NA
-    values
+    Split each string (a la re.split) in the Series/Index by given
+    pattern, propagating NA values. Equivalent to :meth:`str.split`.
 
     Parameters
     ----------
@@ -643,7 +688,7 @@ def str_split(arr, pat=None, n=None, return_type='series'):
 
     Returns
     -------
-    split : array
+    split : Series/Index of objects or DataFrame
     """
     from pandas.core.series import Series
     from pandas.core.frame import DataFrame
@@ -677,7 +722,7 @@ def str_split(arr, pat=None, n=None, return_type='series'):
 
 def str_slice(arr, start=None, stop=None, step=None):
     """
-    Slice substrings from each element in array
+    Slice substrings from each element in the Series/Index
 
     Parameters
     ----------
@@ -687,7 +732,7 @@ def str_slice(arr, start=None, stop=None, step=None):
 
     Returns
     -------
-    sliced : array
+    sliced : Series/Index of objects
     """
     obj = slice(start, stop, step)
     f = lambda x: x[obj]
@@ -696,17 +741,19 @@ def str_slice(arr, start=None, stop=None, step=None):
 
 def str_slice_replace(arr, start=None, stop=None, repl=None):
     """
-    Replace a slice of each string with another string.
+    Replace a slice of each string in the Series/Index with another
+    string.
 
     Parameters
     ----------
     start : int or None
     stop : int or None
     repl : str or None
+        String for replacement
 
     Returns
     -------
-    replaced : array
+    replaced : Series/Index of objects
     """
     if repl is None:
         repl = ''
@@ -726,56 +773,35 @@ def f(x):
     return _na_map(f, arr)
 
 
-def str_strip(arr, to_strip=None):
+def str_strip(arr, to_strip=None, side='both'):
     """
-    Strip whitespace (including newlines) from each string in the array
+    Strip whitespace (including newlines) from each string in the
+    Series/Index.
 
     Parameters
     ----------
     to_strip : str or unicode
+    side : {'left', 'right', 'both'}, default 'both'
 
     Returns
     -------
-    stripped : array
+    stripped : Series/Index of objects
     """
-    return _na_map(lambda x: x.strip(to_strip), arr)
-
-
-def str_lstrip(arr, to_strip=None):
-    """
-    Strip whitespace (including newlines) from left side of each string in the
-    array
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-
-    Returns
-    -------
-    stripped : array
-    """
-    return _na_map(lambda x: x.lstrip(to_strip), arr)
-
-
-def str_rstrip(arr, to_strip=None):
-    """
-    Strip whitespace (including newlines) from right side of each string in the
-    array
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-
-    Returns
-    -------
-    stripped : array
-    """
-    return _na_map(lambda x: x.rstrip(to_strip), arr)
+    if side == 'both':
+        f = lambda x: x.strip(to_strip)
+    elif side == 'left':
+        f = lambda x: x.lstrip(to_strip)
+    elif side == 'right':
+        f = lambda x: x.rstrip(to_strip)
+    else:  # pragma: no cover
+        raise ValueError('Invalid side')
+    return _na_map(f, arr)
 
 
 def str_wrap(arr, width, **kwargs):
-    r"""
-    Wrap long strings to be formatted in paragraphs.
+    """
+    Wrap long strings in the Series/Index to be formatted in
+    paragraphs with length less than a given width.
 
     This method has the same keyword parameters and defaults as
     :class:`textwrap.TextWrapper`.
@@ -787,31 +813,32 @@ def str_wrap(arr, width, **kwargs):
     expand_tabs : bool, optional
         If true, tab characters will be expanded to spaces (default: True)
     replace_whitespace : bool, optional
-        If true, each whitespace character (as defined by string.whitespace) remaining
-        after tab expansion will be replaced by a single space (default: True)
+        If true, each whitespace character (as defined by string.whitespace)
+        remaining after tab expansion will be replaced by a single space
+        (default: True)
     drop_whitespace : bool, optional
-        If true, whitespace that, after wrapping, happens to end up at the beginning
-        or end of a line is dropped (default: True)
+        If true, whitespace that, after wrapping, happens to end up at the
+        beginning or end of a line is dropped (default: True)
     break_long_words : bool, optional
-        If true, then words longer than width will be broken in order to ensure that
-        no lines are longer than width. If it is false, long words will not be broken,
-        and some lines may be longer than width. (default: True)
+        If true, then words longer than width will be broken in order to ensure
+        that no lines are longer than width. If it is false, long words will
+        not be broken, and some lines may be longer than width. (default: True)
     break_on_hyphens : bool, optional
-        If true, wrapping will occur preferably on whitespace and right after hyphens
-        in compound words, as it is customary in English. If false, only whitespaces
-        will be considered as potentially good places for line breaks, but you need
-        to set break_long_words to false if you want truly insecable words.
-        (default: True)
+        If true, wrapping will occur preferably on whitespace and right after
+        hyphens in compound words, as it is customary in English. If false,
+        only whitespaces will be considered as potentially good places for line
+        breaks, but you need to set break_long_words to false if you want truly
+        insecable words. (default: True)
 
     Returns
     -------
-    wrapped : array
+    wrapped : Series/Index of objects
 
     Notes
     -----
-    Internally, this method uses a :class:`textwrap.TextWrapper` instance with default
-    settings. To achieve behavior matching R's stringr library str_wrap function, use
-    the arguments:
+    Internally, this method uses a :class:`textwrap.TextWrapper` instance with
+    default settings. To achieve behavior matching R's stringr library str_wrap
+    function, use the arguments:
 
     - expand_tabs = False
     - replace_whitespace = True
@@ -836,7 +863,8 @@ def str_wrap(arr, width, **kwargs):
 
 def str_get(arr, i):
     """
-    Extract element from lists, tuples, or strings in each element in the array
+    Extract element from lists, tuples, or strings in each element in the
+    Series/Index.
 
     Parameters
     ----------
@@ -845,7 +873,7 @@ def str_get(arr, i):
 
     Returns
     -------
-    items : array
+    items : Series/Index of objects
     """
     f = lambda x: x[i] if len(x) > i else np.nan
     return _na_map(f, arr)
@@ -853,7 +881,8 @@ def str_get(arr, i):
 
 def str_decode(arr, encoding, errors="strict"):
     """
-    Decode character string to unicode using indicated encoding
+    Decode character string in the Series/Index to unicode
+    using indicated encoding. Equivalent to :meth:`str.decode`.
 
     Parameters
     ----------
@@ -862,7 +891,7 @@ def str_decode(arr, encoding, errors="strict"):
 
     Returns
     -------
-    decoded : array
+    decoded : Series/Index of objects
     """
     f = lambda x: x.decode(encoding, errors)
     return _na_map(f, arr)
@@ -870,7 +899,8 @@ def str_decode(arr, encoding, errors="strict"):
 
 def str_encode(arr, encoding, errors="strict"):
     """
-    Encode character string to some other encoding using indicated encoding
+    Encode character string in the Series/Index to some other encoding
+    using indicated encoding. Equivalent to :meth:`str.encode`.
 
     Parameters
     ----------
@@ -879,7 +909,7 @@ def str_encode(arr, encoding, errors="strict"):
 
     Returns
     -------
-    encoded : array
+    encoded : Series/Index of objects
     """
     f = lambda x: x.encode(encoding, errors)
     return _na_map(f, arr)
@@ -1011,7 +1041,7 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
     @copy(str_match)
     def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False):
         result = str_match(self.series, pat, case=case, flags=flags,
-                              na=na, as_indexer=as_indexer)
+                           na=na, as_indexer=as_indexer)
         return self._wrap_result(result)
 
     @copy(str_replace)
@@ -1031,7 +1061,8 @@ def pad(self, width, side='left', fillchar=' '):
         return self._wrap_result(result)
 
     _shared_docs['str_pad'] = ("""
-    Filling %s side of strings with an additional character
+    Filling %(side)s side of strings in the Series/Index with an
+    additional character. Equivalent to :meth:`str.%(method)s`.
 
     Parameters
     ----------
@@ -1043,34 +1074,36 @@ def pad(self, width, side='left', fillchar=' '):
 
     Returns
     -------
-    filled : array
+    filled : Series/Index of objects
     """)
 
-    @Appender(_shared_docs['str_pad'] % 'left and right')
+    @Appender(_shared_docs['str_pad'] % dict(side='left and right',
+              method='center'))
     def center(self, width, fillchar=' '):
         return self.pad(width, side='both', fillchar=fillchar)
 
-    @Appender(_shared_docs['str_pad'] % 'right')
+    @Appender(_shared_docs['str_pad'] % dict(side='right', method='right'))
     def ljust(self, width, fillchar=' '):
         return self.pad(width, side='right', fillchar=fillchar)
 
-    @Appender(_shared_docs['str_pad'] % 'left')
+    @Appender(_shared_docs['str_pad'] % dict(side='left', method='left'))
     def rjust(self, width, fillchar=' '):
         return self.pad(width, side='left', fillchar=fillchar)
 
     def zfill(self, width):
         """"
-        Filling left side with 0
+        Filling left side of strings in the Series/Index with 0.
+        Equivalent to :meth:`str.zfill`.
 
         Parameters
         ----------
         width : int
-            Minimum width of resulting string; additional characters will be filled
-            with 0
+            Minimum width of resulting string; additional characters will be
+            filled with 0
 
         Returns
         -------
-        filled : array
+        filled : Series/Index of objects
         """
         result = str_pad(self.series, width, side='left', fillchar='0')
         return self._wrap_result(result)
@@ -1095,19 +1128,31 @@ def encode(self, encoding, errors="strict"):
         result = str_encode(self.series, encoding, errors)
         return self._wrap_result(result)
 
-    @copy(str_strip)
+    _shared_docs['str_strip'] = ("""
+    Strip whitespace (including newlines) from each string in the
+    Series/Index from %(side)s. Equivalent to :meth:`str.%(method)s`.
+
+    Returns
+    -------
+    stripped : Series/Index of objects
+    """)
+
+    @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
+              method='strip'))
     def strip(self, to_strip=None):
-        result = str_strip(self.series, to_strip)
+        result = str_strip(self.series, to_strip, side='both')
         return self._wrap_result(result)
 
-    @copy(str_lstrip)
+    @Appender(_shared_docs['str_strip'] % dict(side='left side',
+              method='lstrip'))
     def lstrip(self, to_strip=None):
-        result = str_lstrip(self.series, to_strip)
+        result = str_strip(self.series, to_strip, side='left')
         return self._wrap_result(result)
 
-    @copy(str_rstrip)
+    @Appender(_shared_docs['str_strip'] % dict(side='right side',
+              method='rstrip'))
     def rstrip(self, to_strip=None):
-        result = str_rstrip(self.series, to_strip)
+        result = str_strip(self.series, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
@@ -1127,9 +1172,9 @@ def get_dummies(self, sep='|'):
     extract = _pat_wrapper(str_extract, flags=True)
 
     _shared_docs['find'] = ("""
-    Return %(side)s indexes in each strings where the substring is
-    fully contained between [start:end]. Return -1 on failure.
-    Equivalent to standard ``str.%(method)s``.
+    Return %(side)s indexes in each strings in the Series/Index
+    where the substring is fully contained between [start:end].
+    Return -1 on failure. Equivalent to standard :meth:`str.%(method)s`.
 
     Parameters
     ----------
@@ -1142,7 +1187,7 @@ def get_dummies(self, sep='|'):
 
     Returns
     -------
-    found : array
+    found : Series/Index of integer values
 
     See Also
     --------
@@ -1162,45 +1207,51 @@ def rfind(self, sub, start=0, end=None):
         return self._wrap_result(result)
 
     _shared_docs['len'] = ("""
-    Compute length of each string in array.
+    Compute length of each string in the Series/Index.
 
     Returns
     -------
-    lengths : array
+    lengths : Series/Index of integer values
     """)
     len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
 
     _shared_docs['casemethods'] = ("""
-    Convert strings in array to %(type)s.
-    Equivalent to ``str.%(method)s``.
+    Convert strings in the Series/Index to %(type)s.
+    Equivalent to :meth:`str.%(method)s`.
 
     Returns
     -------
-    converted : array
+    converted : Series/Index of objects
     """)
     _shared_docs['lower'] = dict(type='lowercase', method='lower')
     _shared_docs['upper'] = dict(type='uppercase', method='upper')
     _shared_docs['title'] = dict(type='titlecase', method='title')
-    _shared_docs['capitalize'] = dict(type='be capitalized', method='capitalize')
+    _shared_docs['capitalize'] = dict(type='be capitalized',
+                                      method='capitalize')
     _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase')
     lower = _noarg_wrapper(lambda x: x.lower(),
-                           docstring=_shared_docs['casemethods'] % _shared_docs['lower'])
+                           docstring=_shared_docs['casemethods'] %
+                           _shared_docs['lower'])
     upper = _noarg_wrapper(lambda x: x.upper(),
-                           docstring=_shared_docs['casemethods'] % _shared_docs['upper'])
+                           docstring=_shared_docs['casemethods'] %
+                           _shared_docs['upper'])
     title = _noarg_wrapper(lambda x: x.title(),
-                           docstring=_shared_docs['casemethods'] % _shared_docs['title'])
+                           docstring=_shared_docs['casemethods'] %
+                           _shared_docs['title'])
     capitalize = _noarg_wrapper(lambda x: x.capitalize(),
-                                docstring=_shared_docs['casemethods'] % _shared_docs['capitalize'])
+                                docstring=_shared_docs['casemethods'] %
+                                _shared_docs['capitalize'])
     swapcase = _noarg_wrapper(lambda x: x.swapcase(),
-                              docstring=_shared_docs['casemethods'] % _shared_docs['swapcase'])
+                              docstring=_shared_docs['casemethods'] %
+                              _shared_docs['swapcase'])
 
     _shared_docs['ismethods'] = ("""
-    Check whether all characters in each string in the array are %(type)s.
-    Equivalent to ``str.%(method)s``.
+    Check whether all characters in each string in the Series/Index
+    are %(type)s. Equivalent to :meth:`str.%(method)s`.
 
     Returns
     -------
-    Series of boolean values
+    is : Series/array of boolean values
     """)
     _shared_docs['isalnum'] = dict(type='alphanumeric', method='isalnum')
     _shared_docs['isalpha'] = dict(type='alphabetic', method='isalpha')
@@ -1212,20 +1263,29 @@ def rfind(self, sub, start=0, end=None):
     _shared_docs['isnumeric'] = dict(type='numeric', method='isnumeric')
     _shared_docs['isdecimal'] = dict(type='decimal', method='isdecimal')
     isalnum = _noarg_wrapper(lambda x: x.isalnum(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isalnum'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['isalnum'])
     isalpha = _noarg_wrapper(lambda x: x.isalpha(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isalpha'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['isalpha'])
     isdigit = _noarg_wrapper(lambda x: x.isdigit(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isdigit'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['isdigit'])
     isspace = _noarg_wrapper(lambda x: x.isspace(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isspace'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['isspace'])
     islower = _noarg_wrapper(lambda x: x.islower(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['islower'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['islower'])
     isupper = _noarg_wrapper(lambda x: x.isupper(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isupper'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['isupper'])
     istitle = _noarg_wrapper(lambda x: x.istitle(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['istitle'])
+                             docstring=_shared_docs['ismethods'] %
+                             _shared_docs['istitle'])
     isnumeric = _noarg_wrapper(lambda x: compat.u_safe(x).isnumeric(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isnumeric'])
+                               docstring=_shared_docs['ismethods'] %
+                               _shared_docs['isnumeric'])
     isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
-                             docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal'])
+                               docstring=_shared_docs['ismethods'] %
+                               _shared_docs['isdecimal'])

From cf0b4c3e32bad9aac0f7a0c5f82a429dbbf44272 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 25 Apr 2015 11:01:32 +0200
Subject: [PATCH 136/239] DOC: provide more links to API documentation where
 possible (GH3705)

Part 1: 10min and basics.rst
---
 doc/source/10min.rst  |  25 +++---
 doc/source/basics.rst | 204 ++++++++++++++++++++++--------------------
 2 files changed, 119 insertions(+), 110 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index 1f59c38d75f93..94c2d921eb116 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -45,21 +45,22 @@ Object Creation
 
 See the :ref:`Data Structure Intro section <dsintro>`
 
-Creating a ``Series`` by passing a list of values, letting pandas create a default
-integer index
+Creating a :class:`Series` by passing a list of values, letting pandas create
+a default integer index:
 
 .. ipython:: python
 
    s = pd.Series([1,3,5,np.nan,6,8])
    s
 
-Creating a ``DataFrame`` by passing a numpy array, with a datetime index and labeled columns.
+Creating a :class:`DataFrame` by passing a numpy array, with a datetime index
+and labeled columns:
 
 .. ipython:: python
 
-   dates = pd.date_range('20130101',periods=6)
+   dates = pd.date_range('20130101', periods=6)
    dates
-   df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
+   df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
    df
 
 Creating a ``DataFrame`` by passing a dict of objects that can be converted to series-like.
@@ -128,7 +129,7 @@ See the top & bottom rows of the frame
    df.head()
    df.tail(3)
 
-Display the index,columns, and the underlying numpy data
+Display the index, columns, and the underlying numpy data
 
 .. ipython:: python
 
@@ -462,7 +463,7 @@ operations.
 
 See the :ref:`Merging section <merging>`
 
-Concatenating pandas objects together
+Concatenating pandas objects together with :func:`concat`:
 
 .. ipython:: python
 
@@ -555,7 +556,8 @@ Stack
    df2 = df[:4]
    df2
 
-The ``stack`` function "compresses" a level in the DataFrame's columns.
+The :meth:`~DataFrame.stack` method "compresses" a level in the DataFrame's
+columns.
 
 .. ipython:: python
 
@@ -563,8 +565,8 @@ The ``stack`` function "compresses" a level in the DataFrame's columns.
    stacked
 
 With a "stacked" DataFrame or Series (having a ``MultiIndex`` as the
-``index``), the inverse operation of ``stack`` is ``unstack``, which by default
-unstacks the **last level**:
+``index``), the inverse operation of :meth:`~DataFrame.stack` is
+:meth:`~DataFrame.unstack`, which by default unstacks the **last level**:
 
 .. ipython:: python
 
@@ -708,7 +710,8 @@ Plotting
    @savefig series_plot_basic.png
    ts.plot()
 
-On DataFrame, ``plot`` is a convenience to plot all of the columns with labels:
+On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the
+columns with labels:
 
 .. ipython:: python
 
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 8e78ac597479b..d2b899716bce2 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -35,9 +35,9 @@ the previous section:
 Head and Tail
 -------------
 
-To view a small sample of a Series or DataFrame object, use the ``head`` and
-``tail`` methods. The default number of elements to display is five, but you
-may pass a custom number.
+To view a small sample of a Series or DataFrame object, use the
+:meth:`~DataFrame.head` and :meth:`~DataFrame.tail` methods. The default number
+of elements to display is five, but you may pass a custom number.
 
 .. ipython:: python
 
@@ -134,8 +134,10 @@ be handled simultaneously.
 Matching / broadcasting behavior
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-DataFrame has the methods **add, sub, mul, div** and related functions **radd,
-rsub, ...** for carrying out binary operations. For broadcasting behavior,
+DataFrame has the methods :meth:`~DataFrame.add`, :meth:`~DataFrame.sub`,
+:meth:`~DataFrame.mul`, :meth:`~DataFrame.div` and related functions
+:meth:`~DataFrame.radd`, :meth:`~DataFrame.rsub`, ...
+for carrying out binary operations. For broadcasting behavior,
 Series input is of primary interest. Using these functions, you can use to
 either match on the *index* or *columns* via the **axis** keyword:
 
@@ -234,7 +236,8 @@ see :ref:`here<indexing.boolean>`
 Boolean Reductions
 ~~~~~~~~~~~~~~~~~~
 
-You can apply the reductions: ``empty``, ``any()``, ``all()``, and ``bool()`` to provide a
+    You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`,
+:meth:`~DataFrame.all`, and :meth:`~DataFrame.bool` to provide a
 way to summarize a boolean result.
 
 .. ipython:: python
@@ -248,14 +251,15 @@ You can reduce to a final boolean value.
 
    (df>0).any().any()
 
-You can test if a pandas object is empty, via the ``empty`` property.
+You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property.
 
 .. ipython:: python
 
    df.empty
    DataFrame(columns=list('ABC')).empty
 
-To evaluate single-element pandas objects in a boolean context, use the method ``.bool()``:
+To evaluate single-element pandas objects in a boolean context, use the method
+:meth:`~DataFrame.bool`:
 
 .. ipython:: python
 
@@ -311,8 +315,8 @@ That is because NaNs do not compare as equals:
    np.nan == np.nan
 
 So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels)
-have an ``equals`` method for testing equality, with NaNs in corresponding
-locations treated as equal.
+have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in
+corresponding locations treated as equal.
 
 .. ipython:: python
 
@@ -339,7 +343,8 @@ be of "higher quality". However, the lower quality series might extend further
 back in history or have more complete data coverage. As such, we would like to
 combine two DataFrame objects where missing values in one DataFrame are
 conditionally filled with like-labeled values from the other DataFrame. The
-function implementing this operation is ``combine_first``, which we illustrate:
+function implementing this operation is :meth:`~DataFrame.combine_first`,
+which we illustrate:
 
 .. ipython:: python
 
@@ -354,12 +359,12 @@ function implementing this operation is ``combine_first``, which we illustrate:
 General DataFrame Combine
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``combine_first`` method above calls the more general DataFrame method
-``combine``. This method takes another DataFrame and a combiner function,
-aligns the input DataFrame and then passes the combiner function pairs of
-Series (i.e., columns whose names are the same).
+The :meth:`~DataFrame.combine_first` method above calls the more general
+DataFrame method :meth:`~DataFrame.combine`. This method takes another DataFrame
+and a combiner function, aligns the input DataFrame and then passes the combiner
+function pairs of Series (i.e., columns whose names are the same).
 
-So, for instance, to reproduce ``combine_first`` as above:
+So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above:
 
 .. ipython:: python
 
@@ -374,8 +379,9 @@ Descriptive statistics
 A large number of methods for computing descriptive statistics and other related
 operations on :ref:`Series <api.series.stats>`, :ref:`DataFrame
 <api.dataframe.stats>`, and :ref:`Panel <api.panel.stats>`. Most of these
-are aggregations (hence producing a lower-dimensional result) like **sum**,
-**mean**, and **quantile**, but some of them, like **cumsum** and **cumprod**,
+are aggregations (hence producing a lower-dimensional result) like
+:meth:`~DataFrame.sum`, :meth:`~DataFrame.mean`, and :meth:`~DataFrame.quantile`,
+but some of them, like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`,
 produce an object of the same size. Generally speaking, these methods take an
 **axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be
 specified by name or integer:
@@ -412,8 +418,8 @@ standard deviation 1), very concisely:
    xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
    xs_stand.std(1)
 
-Note that methods like **cumsum** and **cumprod** preserve the location of NA
-values:
+Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
+preserve the location of NA values:
 
 .. ipython:: python
 
@@ -456,8 +462,8 @@ will exclude NAs on Series input by default:
    np.mean(df['one'])
    np.mean(df['one'].values)
 
-``Series`` also has a method ``nunique`` which will return the number of unique
-non-null values:
+``Series`` also has a method :meth:`~Series.nunique` which will return the
+number of unique non-null values:
 
 .. ipython:: python
 
@@ -471,7 +477,7 @@ non-null values:
 Summarizing data: describe
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-There is a convenient ``describe`` function which computes a variety of summary
+There is a convenient :meth:`~DataFrame.describe` function which computes a variety of summary
 statistics about a Series or the columns of a DataFrame (excluding NAs of
 course):
 
@@ -492,17 +498,17 @@ You can select specific percentiles to include in the output:
 
 By default, the median is always included.
 
-For a non-numerical Series object, `describe` will give a simple summary of the
-number of unique values and most frequently occurring values:
-
+For a non-numerical Series object, :meth:`~Series.describe` will give a simple
+summary of the number of unique values and most frequently occurring values:
 
 .. ipython:: python
 
    s = Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])
    s.describe()
 
-Note that on a mixed-type DataFrame object, `describe` will restrict the summary to
-include only numerical columns or, if none are, only categorical columns:
+Note that on a mixed-type DataFrame object, :meth:`~DataFrame.describe` will
+restrict the summary to include only numerical columns or, if none are, only
+categorical columns:
 
 .. ipython:: python
 
@@ -518,15 +524,17 @@ arguments. The special value ``all`` can also be used:
     frame.describe(include=['number'])
     frame.describe(include='all')
 
-That feature relies on :ref:`select_dtypes <basics.selectdtypes>`. Refer to there for details about accepted inputs.
+That feature relies on :ref:`select_dtypes <basics.selectdtypes>`. Refer to
+there for details about accepted inputs.
 
 .. _basics.idxmin:
 
 Index of Min/Max Values
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``idxmin`` and ``idxmax`` functions on Series and DataFrame compute the
-index labels with the minimum and maximum corresponding values:
+The :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` functions on Series
+and DataFrame compute the index labels with the minimum and maximum
+corresponding values:
 
 .. ipython:: python
 
@@ -540,7 +548,8 @@ index labels with the minimum and maximum corresponding values:
    df1.idxmax(axis=1)
 
 When there are multiple rows (or columns) matching the minimum or maximum
-value, ``idxmin`` and ``idxmax`` return the first matching index:
+value, :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` return the first
+matching index:
 
 .. ipython:: python
 
@@ -557,7 +566,7 @@ value, ``idxmin`` and ``idxmax`` return the first matching index:
 Value counts (histogramming) / Mode
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``value_counts`` Series method and top-level function computes a histogram
+The :meth:`~Series.value_counts` Series method and top-level function computes a histogram
 of a 1D array of values. It can also be used as a function on regular arrays:
 
 .. ipython:: python
@@ -582,8 +591,8 @@ Similarly, you can get the most frequently occurring value(s) (the mode) of the
 Discretization and quantiling
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Continuous values can be discretized using the ``cut`` (bins based on values)
-and ``qcut`` (bins based on sample quantiles) functions:
+Continuous values can be discretized using the :func:`cut` (bins based on values)
+and :func:`qcut` (bins based on sample quantiles) functions:
 
 .. ipython:: python
 
@@ -594,7 +603,7 @@ and ``qcut`` (bins based on sample quantiles) functions:
    factor = cut(arr, [-5, -1, 0, 1, 5])
    factor
 
-``qcut`` computes sample quantiles. For example, we could slice up some
+:func:`qcut` computes sample quantiles. For example, we could slice up some
 normally distributed data into equal-size quartiles like so:
 
 .. ipython:: python
@@ -618,8 +627,8 @@ Function application
 --------------------
 
 Arbitrary functions can be applied along the axes of a DataFrame or Panel
-using the ``apply`` method, which, like the descriptive statistics methods,
-take an optional ``axis`` argument:
+using the :meth:`~DataFrame.apply` method, which, like the descriptive
+statistics methods, take an optional ``axis`` argument:
 
 .. ipython:: python
 
@@ -629,10 +638,10 @@ take an optional ``axis`` argument:
    df.apply(np.cumsum)
    df.apply(np.exp)
 
-Depending on the return type of the function passed to ``apply``, the result
-will either be of lower dimension or the same dimension.
+Depending on the return type of the function passed to :meth:`~DataFrame.apply`,
+the result will either be of lower dimension or the same dimension.
 
-``apply`` combined with some cleverness can be used to answer many questions
+:meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions
 about a data set. For example, suppose we wanted to extract the date where the
 maximum value for each column occurred:
 
@@ -642,7 +651,7 @@ maximum value for each column occurred:
                     index=date_range('1/1/2000', periods=1000))
    tsdf.apply(lambda x: x.idxmax())
 
-You may also pass additional arguments and keyword arguments to the ``apply``
+You may also pass additional arguments and keyword arguments to the :meth:`~DataFrame.apply`
 method. For instance, consider the following function you would like to apply:
 
 .. code-block:: python
@@ -671,7 +680,7 @@ Series operation on each column or row:
    tsdf
    tsdf.apply(Series.interpolate)
 
-Finally, ``apply`` takes an argument ``raw`` which is False by default, which
+Finally, :meth:`~DataFrame.apply` takes an argument ``raw`` which is False by default, which
 converts each row or column into a Series before applying the function. When
 set to True, the passed function will instead receive an ndarray object, which
 has positive performance implications if you do not need the indexing
@@ -687,9 +696,9 @@ Applying elementwise Python functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Since not all functions can be vectorized (accept NumPy arrays and return
-another array or value), the methods ``applymap`` on DataFrame and analogously
-``map`` on Series accept any Python function taking a single value and
-returning a single value. For example:
+another array or value), the methods :meth:`~DataFrame.applymap` on DataFrame
+and analogously :meth:`~Series.map` on Series accept any Python function taking
+a single value and returning a single value. For example:
 
 .. ipython:: python
    :suppress:
@@ -703,11 +712,10 @@ returning a single value. For example:
    df4['one'].map(f)
    df4.applymap(f)
 
-``Series.map`` has an additional feature which is that it can be used to easily
+:meth:`Series.map` has an additional feature which is that it can be used to easily
 "link" or "map" values defined by a secondary series. This is closely related
 to :ref:`merging/joining functionality <merging>`:
 
-
 .. ipython:: python
 
    s = Series(['six', 'seven', 'six', 'seven', 'six'],
@@ -797,12 +805,11 @@ This is equivalent to the following
 
 .. _basics.reindexing:
 
-
 Reindexing and altering labels
 ------------------------------
 
-``reindex`` is the fundamental data alignment method in pandas. It is used to
-implement nearly all other features relying on label-alignment
+:meth:`~Series.reindex` is the fundamental data alignment method in pandas.
+It is used to implement nearly all other features relying on label-alignment
 functionality. To *reindex* means to conform the data to match a given set of
 labels along a particular axis. This accomplishes several things:
 
@@ -830,8 +837,8 @@ With a DataFrame, you can simultaneously reindex the index and columns:
    df
    df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one'])
 
-For convenience, you may utilize the ``reindex_axis`` method, which takes the
-labels and a keyword ``axis`` parameter.
+For convenience, you may utilize the :meth:`~Series.reindex_axis` method, which
+takes the labels and a keyword ``axis`` parameter.
 
 Note that the ``Index`` objects containing the actual axis labels can be
 **shared** between objects. So if we have a Series and a DataFrame, the
@@ -869,8 +876,8 @@ Reindexing to align with another object
 
 You may wish to take an object and reindex its axes to be labeled the same as
 another object. While the syntax for this is straightforward albeit verbose, it
-is a common enough operation that the ``reindex_like`` method is available to
-make this simpler:
+is a common enough operation that the :meth:`~DataFrame.reindex_like` method is
+available to make this simpler:
 
 .. ipython:: python
    :suppress:
@@ -885,15 +892,12 @@ make this simpler:
    df3
    df.reindex_like(df2)
 
-Reindexing with ``reindex_axis``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 .. _basics.align:
 
 Aligning objects with each other with ``align``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``align`` method is the fastest way to simultaneously align two objects. It
+The :meth:`~Series.align` method is the fastest way to simultaneously align two objects. It
 supports a ``join`` argument (related to :ref:`joining and merging <merging>`):
 
   - ``join='outer'``: take the union of the indexes (default)
@@ -929,7 +933,7 @@ You can also pass an ``axis`` option to only align on the specified axis:
 
 .. _basics.align.frame.series:
 
-If you pass a Series to ``DataFrame.align``, you can choose to align both
+If you pass a Series to :meth:`DataFrame.align`, you can choose to align both
 objects either on the DataFrame's index or columns using the ``axis`` argument:
 
 .. ipython:: python
@@ -941,8 +945,8 @@ objects either on the DataFrame's index or columns using the ``axis`` argument:
 Filling while reindexing
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-``reindex`` takes an optional parameter ``method`` which is a filling method
-chosen from the following table:
+:meth:`~Series.reindex` takes an optional parameter ``method`` which is a
+filling method chosen from the following table:
 
 .. csv-table::
     :header: "Method", "Action"
@@ -978,17 +982,17 @@ Note that the same result could have been achieved using
 
    ts2.reindex(ts.index).fillna(method='ffill')
 
-``reindex`` will raise a ValueError if the index is not monotonic increasing or
-descreasing. ``fillna`` and ``interpolate`` will not make any checks on the
-order of the index.
+:meth:`~Series.reindex` will raise a ValueError if the index is not monotonic
+increasing or descreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate`
+will not make any checks on the order of the index.
 
 .. _basics.drop:
 
 Dropping labels from an axis
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-A method closely related to ``reindex`` is the ``drop`` function. It removes a
-set of labels from an axis:
+A method closely related to ``reindex`` is the :meth:`~DataFrame.drop` function.
+It removes a set of labels from an axis:
 
 .. ipython:: python
 
@@ -1007,8 +1011,8 @@ Note that the following also works, but is a bit less obvious / clean:
 Renaming / mapping labels
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``rename`` method allows you to relabel an axis based on some mapping (a
-dict or Series) or an arbitrary function.
+The :meth:`~DataFrame.rename` method allows you to relabel an axis based on some
+mapping (a dict or Series) or an arbitrary function.
 
 .. ipython:: python
 
@@ -1024,14 +1028,14 @@ Series, it need only contain a subset of the labels as keys:
    df.rename(columns={'one' : 'foo', 'two' : 'bar'},
              index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'})
 
-The ``rename`` method also provides an ``inplace`` named parameter that is by
-default ``False`` and copies the underlying data. Pass ``inplace=True`` to
-rename the data in place.
+The :meth:`~DataFrame.rename` method also provides an ``inplace`` named
+parameter that is by default ``False`` and copies the underlying data. Pass
+``inplace=True`` to rename the data in place.
 
 .. _basics.rename_axis:
 
-The Panel class has a related ``rename_axis`` class which can rename any of
-its three axes.
+The Panel class has a related :meth:`~Panel.rename_axis` class which can rename
+any of its three axes.
 
 Iteration
 ---------
@@ -1055,8 +1059,8 @@ Thus, for example:
 iteritems
 ~~~~~~~~~
 
-Consistent with the dict-like interface, **iteritems** iterates through
-key-value pairs:
+Consistent with the dict-like interface, :meth:`~DataFrame.iteritems` iterates
+through key-value pairs:
 
   * **Series**: (index, scalar value) pairs
   * **DataFrame**: (column, Series) pairs
@@ -1078,8 +1082,8 @@ iterrows
 ~~~~~~~~
 
 New in v0.7 is the ability to iterate efficiently through rows of a
-DataFrame. It returns an iterator yielding each index value along with a Series
-containing the data in each row:
+DataFrame with :meth:`~DataFrame.iterrows`. It returns an iterator yielding each
+index value along with a Series containing the data in each row:
 
 .. ipython::
 
@@ -1113,7 +1117,7 @@ For instance, a contrived way to transpose the DataFrame would be:
 itertuples
 ~~~~~~~~~~
 
-This method will return an iterator yielding a tuple for each row in the
+The :meth:`~DataFrame.itertuples` method will return an iterator yielding a tuple for each row in the
 DataFrame. The first element of the tuple will be the row's corresponding index
 value, while the remaining values are the row values proper.
 
@@ -1129,7 +1133,8 @@ For instance,
 .dt accessor
 ~~~~~~~~~~~~
 
-``Series`` has an accessor to succinctly return datetime like properties for the *values* of the Series, if its a datetime/period like Series.
+``Series`` has an accessor to succinctly return datetime like properties for the
+*values* of the Series, if its a datetime/period like Series.
 This will return a Series, indexed like the existing Series.
 
 .. ipython:: python
@@ -1213,7 +1218,7 @@ Sorting by index and value
 
 There are two obvious kinds of sorting that you may be interested in: sorting
 by label and sorting by actual values. The primary method for sorting axis
-labels (indexes) across data structures is the ``sort_index`` method.
+labels (indexes) across data structures is the :meth:`~DataFrame.sort_index` method.
 
 .. ipython:: python
 
@@ -1223,7 +1228,7 @@ labels (indexes) across data structures is the ``sort_index`` method.
    unsorted_df.sort_index(ascending=False)
    unsorted_df.sort_index(axis=1)
 
-``DataFrame.sort_index`` can accept an optional ``by`` argument for ``axis=0``
+:meth:`DataFrame.sort_index` can accept an optional ``by`` argument for ``axis=0``
 which will use an arbitrary vector or a column name of the DataFrame to
 determine the sort order:
 
@@ -1238,7 +1243,7 @@ The ``by`` argument can take a list of column names, e.g.:
 
    df1[['one', 'two', 'three']].sort_index(by=['one','two'])
 
-Series has the method ``order`` (analogous to `R's order function
+Series has the method :meth:`~Series.order` (analogous to `R's order function
 <http://stat.ethz.ch/R-manual/R-patched/library/base/html/order.html>`__) which
 sorts by value, with special treatment of NA values via the ``na_position``
 argument:
@@ -1251,12 +1256,12 @@ argument:
 
 .. note::
 
-   ``Series.sort`` sorts a Series by value in-place. This is to provide
+   :meth:`Series.sort` sorts a Series by value in-place. This is to provide
    compatibility with NumPy methods which expect the ``ndarray.sort``
-   behavior. ``Series.order`` returns a copy of the sorted data.
+   behavior. :meth:`Series.order` returns a copy of the sorted data.
 
-Series has the ``searchsorted`` method, which works similar to
-``np.ndarray.searchsorted``.
+Series has the :meth:`~Series.searchsorted` method, which works similar to
+:meth:`numpy.ndarray.searchsorted`.
 
 .. ipython:: python
 
@@ -1275,7 +1280,7 @@ smallest / largest values
 
 .. versionadded:: 0.14.0
 
-``Series`` has the ``nsmallest`` and ``nlargest`` methods which return the
+``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the
 smallest or largest :math:`n` values. For a large ``Series`` this can be much
 faster than sorting the entire Series and calling ``head(n)`` on the result.
 
@@ -1305,7 +1310,7 @@ all levels to ``by``.
 Copying
 -------
 
-The ``copy`` method on pandas objects copies the underlying data (though not
+The :meth:`~DataFrame.copy` method on pandas objects copies the underlying data (though not
 the axis indexes, since they are immutable) and returns a new object. Note that
 **it is seldom necessary to copy objects**. For example, there are only a
 handful of ways to alter a DataFrame *in-place*:
@@ -1324,8 +1329,9 @@ untouched. If data is modified, it is because you did so explicitly.
 dtypes
 ------
 
-The main types stored in pandas objects are ``float``, ``int``, ``bool``, ``datetime64[ns]``, ``timedelta[ns]``,
-and ``object``. In addition these dtypes have item sizes, e.g. ``int64`` and ``int32``. A convenient ``dtypes``
+The main types stored in pandas objects are ``float``, ``int``, ``bool``,
+``datetime64[ns]``, ``timedelta[ns]`` and ``object``. In addition these dtypes
+have item sizes, e.g. ``int64`` and ``int32``. A convenient :attr:`~DataFrame.dtypes``
 attribute for DataFrames returns a Series with the data type of each column.
 
 .. ipython:: python
@@ -1340,7 +1346,7 @@ attribute for DataFrames returns a Series with the data type of each column.
    dft
    dft.dtypes
 
-On a ``Series`` use the ``dtype`` method.
+On a ``Series`` use the :attr:`~Series.dtype` attribute.
 
 .. ipython:: python
 
@@ -1358,7 +1364,7 @@ general).
    # string data forces an ``object`` dtype
    Series([1, 2, 3, 6., 'foo'])
 
-The method ``get_dtype_counts`` will return the number of columns of
+The method :meth:`~DataFrame.get_dtype_counts` will return the number of columns of
 each type in a ``DataFrame``:
 
 .. ipython:: python
@@ -1426,7 +1432,7 @@ astype
 
 .. _basics.cast:
 
-You can use the ``astype`` method to explicitly convert dtypes from one to another. These will by default return a copy,
+You can use the :meth:`~DataFrame.astype` method to explicitly convert dtypes from one to another. These will by default return a copy,
 even if the dtype was unchanged (pass ``copy=False`` to change this behavior). In addition, they will raise an
 exception if the astype operation is invalid.
 
@@ -1444,7 +1450,7 @@ then the more *general* one will be used as the result of the operation.
 object conversion
 ~~~~~~~~~~~~~~~~~
 
-``convert_objects`` is a method to try to force conversion of types from the ``object`` dtype to other types.
+:meth:`~DataFrame.convert_objects` is a method to try to force conversion of types from the ``object`` dtype to other types.
 To force conversion of specific types that are *number like*, e.g. could be a string that represents a number,
 pass ``convert_numeric=True``. This will force strings and numbers alike to be numbers if possible, otherwise
 they will be set to ``np.nan``.
@@ -1473,7 +1479,7 @@ but occasionally has non-dates intermixed and you want to represent as missing.
    s
    s.convert_objects(convert_dates='coerce')
 
-In addition, ``convert_objects`` will attempt the *soft* conversion of any *object* dtypes, meaning that if all
+In addition, :meth:`~DataFrame.convert_objects` will attempt the *soft* conversion of any *object* dtypes, meaning that if all
 the objects in a Series are of the same type, the Series will have that dtype.
 
 gotchas
@@ -1513,10 +1519,10 @@ Selecting columns based on ``dtype``
 
 .. versionadded:: 0.14.1
 
-The :meth:`~pandas.DataFrame.select_dtypes` method implements subsetting of columns
+The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns
 based on their ``dtype``.
 
-First, let's create a :class:`~pandas.DataFrame` with a slew of different
+First, let's create a :class:`DataFrame` with a slew of different
 dtypes:
 
 .. ipython:: python
@@ -1535,7 +1541,7 @@ dtypes:
    df
 
 
-``select_dtypes`` has two parameters ``include`` and ``exclude`` that allow you to
+:meth:`~DataFrame.select_dtypes` has two parameters ``include`` and ``exclude`` that allow you to
 say "give me the columns WITH these dtypes" (``include``) and/or "give the
 columns WITHOUT these dtypes" (``exclude``).
 

From b2efd61c6a1393b206ae8a0df4c1b460bb1c36d9 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Wed, 29 Apr 2015 22:47:00 -0700
Subject: [PATCH 137/239] Fix TimedeltaIndex constructor fastpath when name is
 set

Fixes GH10025
---
 doc/source/whatsnew/v0.16.1.txt         | 1 +
 pandas/tseries/tdi.py                   | 2 +-
 pandas/tseries/tests/test_timedeltas.py | 4 ++++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index cf8b5ce9ae0f4..6a6b38a0b7600 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -235,6 +235,7 @@ Bug Fixes
 - Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
+- Bug with ``TimedeltaIndex`` constructor ignoring ``name`` when given another ``TimedeltaIndex`` as data (:issue:`10025`).
 - Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`)
 
 - Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`)
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index 0f8ba279ec3a6..86c427682c553 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -140,7 +140,7 @@ def __new__(cls, data=None, unit=None,
                 copy=False, name=None,
                 closed=None, verify_integrity=True, **kwargs):
 
-        if isinstance(data, TimedeltaIndex) and freq is None:
+        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
             if copy:
                 data = data.copy()
             return data
diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index bc51e01ca9bdf..faf4e3fa57780 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -949,6 +949,10 @@ def test_constructor_name(self):
                             name='TEST')
         self.assertEqual(idx.name, 'TEST')
 
+        # GH10025
+        idx2 = TimedeltaIndex(idx, name='something else')
+        self.assertEqual(idx2.name, 'something else')
+
     def test_freq_conversion(self):
 
         # doc example

From 4eb4df96a4f0e3d418ab837119016fb2e5ebf7c7 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Wed, 29 Apr 2015 23:30:19 -0700
Subject: [PATCH 138/239] DOC: mention other packages in IO docs (for now, just
 xray for netCDF)

As discussed in #5487.

This would also be a nice place to mention other packages that connect the
pandas DataFrames with other file formats. Right now, the only one I can think
of off-hand is `root_pandas` (#9378, CC @ibab), but I'm sure there are more.
---
 doc/source/io.rst | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index a6c702e1cd874..52c7402594c60 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3996,6 +3996,24 @@ whether imported ``Categorical`` variables are ordered.
     a ``Categorial`` with string categories for the values that are labeled and
     numeric categories for values with no label.
 
+.. _io.other:
+
+Other file formats
+------------------
+
+pandas itself only supports IO with a limited set of file formats that map
+cleanly to its tabular data model. For reading and writing other file formats
+into and from pandas, we recommend these packages from the broader community.
+
+netCDF
+~~~~~~
+
+xray_ provides data structures inspired by the pandas DataFrame for working
+with multi-dimensional datasets, with a focus on the netCDF file format and
+easy conversion to and from pandas.
+
+.. _xray: http://xray.readthedocs.org/
+
 .. _io.perf:
 
 Performance Considerations

From 0b29e62cdf6bd068bf8e8204444b804050f3aca8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 1 May 2015 11:51:02 -0400
Subject: [PATCH 139/239] disallow numeric setlike operations on
 CategoricalIndex (GH10039)

---
 pandas/core/index.py       | 16 ++++++++++++++++
 pandas/tests/test_index.py | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index 83f60b360c746..6b75dba549f31 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -2469,6 +2469,21 @@ def _evaluate_compare(self, other):
         cls.__le__ = _make_compare('__le__')
         cls.__ge__ = _make_compare('__ge__')
 
+    @classmethod
+    def _add_numericlike_set_methods_disabled(cls):
+        """ add in the numeric set-like methods to disable """
+
+        def _make_invalid_op(name):
+
+            def invalid_op(self, other=None):
+                raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name,
+                                                                                           typ=type(self)))
+            invalid_op.__name__ = name
+            return invalid_op
+
+        cls.__add__ = cls.__add__ = __iadd__ = _make_invalid_op('__add__')
+        cls.__sub__ = cls.__sub__ = __isub__ = _make_invalid_op('__sub__')
+
     @classmethod
     def _add_numeric_methods_disabled(cls):
         """ add in numeric methods to disable """
@@ -3148,6 +3163,7 @@ def _add_accessors(cls):
                                                  overwrite=True)
 
 
+CategoricalIndex._add_numericlike_set_methods_disabled()
 CategoricalIndex._add_numeric_methods_disabled()
 CategoricalIndex._add_logical_methods_disabled()
 CategoricalIndex._add_comparison_methods()
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 313a723f7b1ef..557228ccbf4da 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1488,6 +1488,19 @@ def test_construction_with_dtype(self):
         result = CategoricalIndex(idx, categories=idx, ordered=True)
         tm.assert_index_equal(result, expected, exact=True)
 
+    def test_disallow_set_ops(self):
+
+        # GH 10039
+        # set ops (+/-) raise TypeError
+        idx = pd.Index(pd.Categorical(['a', 'b']))
+
+        self.assertRaises(TypeError, lambda : idx - idx)
+        self.assertRaises(TypeError, lambda : idx + idx)
+        self.assertRaises(TypeError, lambda : idx - ['a','b'])
+        self.assertRaises(TypeError, lambda : idx + ['a','b'])
+        self.assertRaises(TypeError, lambda : ['a','b'] - idx)
+        self.assertRaises(TypeError, lambda : ['a','b'] + idx)
+
     def test_method_delegation(self):
 
         ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
@@ -3882,6 +3895,12 @@ def test_difference(self):
         # - API change GH 8226
         with tm.assert_produces_warning():
             first - self.index[-3:]
+        with tm.assert_produces_warning():
+            self.index[-3:] - first
+        with tm.assert_produces_warning():
+            self.index[-3:] - first.tolist()
+        with tm.assert_produces_warning():
+            first.tolist() - self.index[-3:]
 
         expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
                                           sortorder=0,

From cd7a5c3144320bf8624390e94ab20f368f0c2157 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 1 May 2015 12:00:36 -0400
Subject: [PATCH 140/239] BUG: provide deprecation warnings when using setlike
 operations on Indexes and lists (for +/-) (GH10038)

---
 pandas/core/index.py             | 13 +++++++------
 pandas/core/reshape.py           |  4 ++--
 pandas/io/tests/test_pytables.py |  2 +-
 pandas/tests/test_index.py       | 11 ++++++++---
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index 6b75dba549f31..b49108378ca68 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1179,17 +1179,18 @@ def argsort(self, *args, **kwargs):
         return result.argsort(*args, **kwargs)
 
     def __add__(self, other):
-        if isinstance(other, Index):
+        if com.is_list_like(other):
             warnings.warn("using '+' to provide set union with Indexes is deprecated, "
                           "use '|' or .union()",FutureWarning)
+        if isinstance(other, Index):
             return self.union(other)
         return Index(np.array(self) + other)
     __iadd__ = __add__
+    __radd__ = __add__
 
     def __sub__(self, other):
-        if isinstance(other, Index):
-            warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
-                          "use .difference()",FutureWarning)
+        warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
+                      "use .difference()",FutureWarning)
         return self.difference(other)
 
     def __and__(self, other):
@@ -2481,8 +2482,8 @@ def invalid_op(self, other=None):
             invalid_op.__name__ = name
             return invalid_op
 
-        cls.__add__ = cls.__add__ = __iadd__ = _make_invalid_op('__add__')
-        cls.__sub__ = cls.__sub__ = __isub__ = _make_invalid_op('__sub__')
+        cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__')
+        cls.__sub__ = __isub__ = _make_invalid_op('__sub__')
 
     @classmethod
     def _add_numeric_methods_disabled(cls):
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index af98e533cb5b7..9a812ec71b9a2 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -612,7 +612,7 @@ def _convert_level_number(level_num, columns):
         new_data[key] = value_slice.ravel()
 
     if len(drop_cols) > 0:
-        new_columns = new_columns - drop_cols
+        new_columns = new_columns.difference(drop_cols)
 
     N = len(this)
 
@@ -1045,7 +1045,7 @@ def check_len(item, name):
         with_dummies = [result]
         for (col, pre, sep) in zip(columns_to_encode, prefix, prefix_sep):
 
-            dummy = _get_dummies_1d(data[col], prefix=pre, prefix_sep=sep, 
+            dummy = _get_dummies_1d(data[col], prefix=pre, prefix_sep=sep,
                                     dummy_na=dummy_na, sparse=sparse)
             with_dummies.append(dummy)
         result = concat(with_dummies, axis=1)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index ec33e53481950..2e5cef8a1ef57 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -3613,7 +3613,7 @@ def test_frame_select_complex(self):
 
             # invert ok for filters
             result = store.select('df', "~(columns=['A','B'])")
-            expected = df.loc[:,df.columns-['A','B']]
+            expected = df.loc[:,df.columns.difference(['A','B'])]
             tm.assert_frame_equal(result, expected)
 
             # in
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 557228ccbf4da..a7ce3dcdda9f7 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -686,6 +686,10 @@ def test_add(self):
         # - API change GH 8226
         with tm.assert_produces_warning():
             self.strIndex + self.strIndex
+        with tm.assert_produces_warning():
+            self.strIndex + self.strIndex.tolist()
+        with tm.assert_produces_warning():
+            self.strIndex.tolist() + self.strIndex
 
         firstCat = self.strIndex.union(self.dateIndex)
         secondCat = self.strIndex.union(self.strIndex)
@@ -772,6 +776,7 @@ def test_difference(self):
         assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5)
 
     def test_symmetric_diff(self):
+
         # smoke
         idx1 = Index([1, 2, 3, 4], name='idx1')
         idx2 = Index([2, 3, 4, 5])
@@ -819,7 +824,7 @@ def test_symmetric_diff(self):
 
         # other isn't iterable
         with tm.assertRaises(TypeError):
-            Index(idx1,dtype='object') - 1
+            Index(idx1,dtype='object').difference(1)
 
     def test_is_numeric(self):
         self.assertFalse(self.dateIndex.is_numeric())
@@ -3899,8 +3904,8 @@ def test_difference(self):
             self.index[-3:] - first
         with tm.assert_produces_warning():
             self.index[-3:] - first.tolist()
-        with tm.assert_produces_warning():
-            first.tolist() - self.index[-3:]
+
+        self.assertRaises(TypeError, lambda : first.tolist() - self.index[-3:])
 
         expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
                                           sortorder=0,

From 9022f6c3bd2e7df6cc50b2574776fff12363476e Mon Sep 17 00:00:00 2001
From: Manuel Riel <m@nuelriel.com>
Date: Mon, 4 May 2015 19:06:33 +0700
Subject: [PATCH 141/239] Return correct subclass when slicing DataFrame.

---
 doc/source/whatsnew/v0.16.1.txt |  4 +++
 pandas/core/frame.py            |  4 +--
 pandas/tests/test_frame.py      | 53 +++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 6a6b38a0b7600..355b8b77b7e27 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -408,4 +408,8 @@ Bug Fixes
 =======
 - Google BigQuery connector now imports dependencies on a per-method basis.(:issue:`9713`)
 - Updated BigQuery connector to no longer use deprecated ``oauth2client.tools.run()`` (:issue:`8327`)
+<<<<<<< HEAD
 >>>>>>> 2cf4132... Updates to Google BigQuery connector (#9713, #8327)
+=======
+- Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`)
+>>>>>>> 5805889... Return correct subclass when slicing DataFrame.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 01b0d65e055df..cf676b81388a2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1839,7 +1839,7 @@ def _getitem_multilevel(self, key):
                 result.columns = result_columns
             else:
                 new_values = self.values[:, loc]
-                result = DataFrame(new_values, index=self.index,
+                result = self._constructor(new_values, index=self.index,
                                    columns=result_columns).__finalize__(self)
             if len(result.columns) == 1:
                 top = result.columns[0]
@@ -1847,7 +1847,7 @@ def _getitem_multilevel(self, key):
                         (type(top) == tuple and top[0] == '')):
                     result = result['']
                     if isinstance(result, Series):
-                        result = Series(result, index=self.index, name=key)
+                        result = self._constructor_sliced(result, index=self.index, name=key)
 
             result._set_is_copy(self)
             return result
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 3f60f10e81013..4964d13f7ac28 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -2791,6 +2791,59 @@ def test_insert_error_msmgs(self):
         with assertRaisesRegexp(TypeError, msg):
             df['gr'] = df.groupby(['b', 'c']).count()
 
+    def test_frame_subclassing_and_slicing(self):
+        # Subclass frame and ensure it returns the right class on slicing it
+        # In reference to PR 9632
+
+        class CustomSeries(Series):
+            @property
+            def _constructor(self):
+                return CustomSeries
+
+            def custom_series_function(self):
+                return 'OK'
+
+        class CustomDataFrame(DataFrame):
+            "Subclasses pandas DF, fills DF with simulation results, adds some custom plotting functions."
+
+            def __init__(self, *args, **kw):
+                super(CustomDataFrame, self).__init__(*args, **kw)
+
+            @property
+            def _constructor(self):
+                return CustomDataFrame
+
+            _constructor_sliced = CustomSeries
+
+            def custom_frame_function(self):
+                return 'OK'
+
+        data = {'col1': range(10),
+                'col2': range(10)}
+        cdf = CustomDataFrame(data)
+        
+        # Did we get back our own DF class?
+        self.assertTrue(isinstance(cdf, CustomDataFrame))
+
+        # Do we get back our own Series class after selecting a column?
+        cdf_series = cdf.col1
+        self.assertTrue(isinstance(cdf_series, CustomSeries))
+        self.assertEqual(cdf_series.custom_series_function(), 'OK')
+
+        # Do we get back our own DF class after slicing row-wise?
+        cdf_rows = cdf[1:5]
+        self.assertTrue(isinstance(cdf_rows, CustomDataFrame))
+        self.assertEqual(cdf_rows.custom_frame_function(), 'OK')        
+
+        # Make sure sliced part of multi-index frame is custom class
+        mcol = pd.MultiIndex.from_tuples([('A', 'A'), ('A', 'B')])
+        cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
+        self.assertTrue(isinstance(cdf_multi['A'], CustomDataFrame))
+
+        mcol = pd.MultiIndex.from_tuples([('A', ''), ('B', '')])
+        cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
+        self.assertTrue(isinstance(cdf_multi2['A'], CustomSeries))
+
     def test_constructor_subclass_dict(self):
         # Test for passing dict subclass to constructor
         data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)),

From 85242365d3ebefe6b413c458357c0601593c1a94 Mon Sep 17 00:00:00 2001
From: Younggun Kim <scari.net@gmail.com>
Date: Sat, 2 May 2015 05:31:35 +0900
Subject: [PATCH 142/239] BUG: Resample BM/BQ adds extra index point #9756

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/tests/test_groupby.py    | 10 ++++++++++
 pandas/tseries/frequencies.py   | 11 ++++++++---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 355b8b77b7e27..88d4819b5f4c2 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -374,12 +374,16 @@ Bug Fixes
 =======
 - Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 =======
 
 - Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`)
 >>>>>>> 99aabee... What's new and doc
 =======
+=======
+- Bug causing extra index point when resample BM/BQ (:issue:`9756`)
+>>>>>>> c0d4339... BUG: Resample BM/BQ adds extra index point #9756
 - Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`)
 <<<<<<< HEAD
 >>>>>>> b1e8d9f... Moved caching in `AbstractHolidayCalendar` to the instance level
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 0e64d27649d80..e16dd0e4cb193 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -966,6 +966,16 @@ def demean(arr):
         expected = DataFrame({'b' : range(5)})
         tm.assert_frame_equal(result, expected)
 
+    def test_resample_extra_index_point(self):
+        # GH 9756
+        expected_i = pd.DatetimeIndex(start='20150101', end='20150331', freq='BM')
+        expected = pd.DataFrame(index=expected_i, data=len(expected_i)*[0])
+
+        index = pd.DatetimeIndex(start='20150101', end='20150331', freq='B')
+        df = pd.DataFrame(index=index, data=len(index)*[0])
+        result = df.resample('BM', how='last')
+        assert_frame_equal(result, expected)
+
     def test_transform_fast(self):
 
         df = DataFrame( { 'id' : np.arange( 100000 ) / 3,
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index b220e03fdb327..a63e9e55b8a07 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -989,7 +989,7 @@ def is_subperiod(source, target):
         return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N']
     elif _is_quarterly(target):
         return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N']
-    elif target == 'M':
+    elif _is_monthly(target):
         return source in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N']
     elif _is_weekly(target):
         return source in [target, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N']
@@ -1048,7 +1048,7 @@ def is_superperiod(source, target):
         return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N']
     elif _is_quarterly(source):
         return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N']
-    elif source == 'M':
+    elif _is_monthly(source):
         return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N']
     elif _is_weekly(source):
         return target in [source, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N']
@@ -1093,7 +1093,12 @@ def _quarter_months_conform(source, target):
 
 def _is_quarterly(rule):
     rule = rule.upper()
-    return rule == 'Q' or rule.startswith('Q-')
+    return rule == 'Q' or rule.startswith('Q-') or rule.startswith('BQ')
+
+
+def _is_monthly(rule):
+    rule = rule.upper()
+    return rule == 'M' or rule == 'BM'
 
 
 def _is_weekly(rule):

From 28b72e36433aa06f38a4abf8e0db6bf2aca464c4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 4 May 2015 15:07:50 -0400
Subject: [PATCH 143/239] cleanup test for GH10044

---
 pandas/tests/test_groupby.py          | 14 ++------------
 pandas/tseries/tests/test_resample.py | 10 ++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index e16dd0e4cb193..d19271eaf986d 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -966,16 +966,6 @@ def demean(arr):
         expected = DataFrame({'b' : range(5)})
         tm.assert_frame_equal(result, expected)
 
-    def test_resample_extra_index_point(self):
-        # GH 9756
-        expected_i = pd.DatetimeIndex(start='20150101', end='20150331', freq='BM')
-        expected = pd.DataFrame(index=expected_i, data=len(expected_i)*[0])
-
-        index = pd.DatetimeIndex(start='20150101', end='20150331', freq='B')
-        df = pd.DataFrame(index=index, data=len(index)*[0])
-        result = df.resample('BM', how='last')
-        assert_frame_equal(result, expected)
-
     def test_transform_fast(self):
 
         df = DataFrame( { 'id' : np.arange( 100000 ) / 3,
@@ -5108,10 +5098,10 @@ def test_groupby_categorical_two_columns(self):
         tm.assert_frame_equal(res, exp)
 
     def test_groupby_apply_all_none(self):
-        # Tests to make sure no errors if apply function returns all None 
+        # Tests to make sure no errors if apply function returns all None
         # values. Issue 9684.
         test_df = DataFrame({'groups': [0,0,1,1], 'random_vars': [8,7,4,5]})
-        
+
         def test_func(x):
             pass
         result = test_df.groupby('groups').apply(test_func)
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index c1f5854b6b2b5..17f4b01fd4f44 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -378,6 +378,16 @@ def test_resample_upsample(self):
 
         self.assertEqual(result.index.name, 'index')
 
+    def test_resample_extra_index_point(self):
+        # GH 9756
+        index = DatetimeIndex(start='20150101', end='20150331', freq='BM')
+        expected = DataFrame({'A' : Series([21,41,63], index=index)})
+
+        index = DatetimeIndex(start='20150101', end='20150331', freq='B')
+        df = DataFrame({'A' : Series(range(len(index)),index=index)})
+        result = df.resample('BM', how='last')
+        assert_frame_equal(result, expected)
+
     def test_upsample_with_limit(self):
         rng = date_range('1/1/2000', periods=3, freq='5t')
         ts = Series(np.random.randn(len(rng)), rng)

From bb4a94a80d87a03f5e3655d8fca269b6d0256028 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Wed, 29 Apr 2015 02:43:11 -0700
Subject: [PATCH 144/239] DOC: use _shared_docs for fillna methods

---
 pandas/core/frame.py   | 10 +++++++++-
 pandas/core/generic.py | 14 ++++++++------
 pandas/core/panel.py   | 10 +++++++++-
 pandas/core/series.py  | 10 +++++++++-
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cf676b81388a2..9366ce859ce89 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -66,7 +66,7 @@
 # Docstring templates
 
 _shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame',
-                          axes_single_arg="{0,1,'index','columns'}")
+                          axes_single_arg="{0, 1, 'index', 'columns'}")
 
 _numeric_only_doc = """numeric_only : boolean, default None
     Include only float, int, boolean data. If None, will attempt to use
@@ -2517,6 +2517,14 @@ def rename(self, index=None, columns=None, **kwargs):
         return super(DataFrame, self).rename(index=index, columns=columns,
                                              **kwargs)
 
+    @Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
+    def fillna(self, value=None, method=None, axis=None, inplace=False,
+               limit=None, downcast=None, **kwargs):
+        return super(DataFrame, self).fillna(value=value, method=method,
+                                             axis=axis, inplace=inplace,
+                                             limit=limit, downcast=downcast,
+                                             **kwargs)
+
     def set_index(self, keys, drop=True, append=False, inplace=False,
                   verify_integrity=False):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8f1eb81c6c362..27565056490ce 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2392,8 +2392,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
     #----------------------------------------------------------------------
     # Filling NA's
 
-    def fillna(self, value=None, method=None, axis=None, inplace=False,
-               limit=None, downcast=None):
+    _shared_docs['fillna'] = (
         """
         Fill NA/NaN values using the specified method
 
@@ -2408,9 +2407,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
             values specifying which value to use for each index (for a Series) or
             column (for a DataFrame). (values not in the dict/Series/DataFrame will not be
             filled). This value cannot be a list.
-        axis : {0, 1}, default 0
-            * 0: fill column-by-column
-            * 1: fill row-by-row
+        axis : %(axes_single_arg)s
         inplace : boolean, default False
             If True, fill in place. Note: this will modify any
             other views on this object, (e.g. a no-copy slice for a column in a
@@ -2433,8 +2430,13 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
 
         Returns
         -------
-        filled : same type as caller
+        filled : %(klass)s
         """
+    )
+
+    @Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
+    def fillna(self, value=None, method=None, axis=None, inplace=False,
+               limit=None, downcast=None):
         if isinstance(value, (list, tuple)):
             raise TypeError('"value" parameter must be a scalar or dict, but '
                             'you passed a "{0}"'.format(type(value).__name__))
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index fb511bdd2a788..2cd2412cfac66 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -34,7 +34,7 @@
 _shared_doc_kwargs = dict(
     axes='items, major_axis, minor_axis',
     klass="Panel",
-    axes_single_arg="{0,1,2,'items','major_axis','minor_axis'}")
+    axes_single_arg="{0, 1, 2, 'items', 'major_axis', 'minor_axis'}")
 _shared_doc_kwargs['args_transpose'] = ("three positional arguments: each one"
                                         "of\n        %s" %
                                         _shared_doc_kwargs['axes_single_arg'])
@@ -1161,6 +1161,14 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
     def transpose(self, *args, **kwargs):
         return super(Panel, self).transpose(*args, **kwargs)
 
+    @Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
+    def fillna(self, value=None, method=None, axis=None, inplace=False,
+               limit=None, downcast=None, **kwargs):
+        return super(Panel, self).fillna(value=value, method=method,
+                                         axis=axis, inplace=inplace,
+                                         limit=limit, downcast=downcast,
+                                         **kwargs)
+
     def count(self, axis='major'):
         """
         Return number of observations over requested axis.
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4ad5e06693221..a63434a43c7f9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -59,7 +59,7 @@
 _shared_doc_kwargs = dict(
     axes='index',
     klass='Series',
-    axes_single_arg="{0,'index'}",
+    axes_single_arg="{0, 'index'}",
     inplace="""inplace : boolean, default False
             If True, performs operation inplace and returns None.""",
     duplicated='Series'
@@ -2143,6 +2143,14 @@ def rename(self, index=None, **kwargs):
     def reindex(self, index=None, **kwargs):
         return super(Series, self).reindex(index=index, **kwargs)
 
+    @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
+    def fillna(self, value=None, method=None, axis=None, inplace=False,
+               limit=None, downcast=None, **kwargs):
+        return super(Series, self).fillna(value=value, method=method,
+                                          axis=axis, inplace=inplace,
+                                          limit=limit, downcast=downcast,
+                                          **kwargs)
+
     def reindex_axis(self, labels, axis=0, **kwargs):
         """ for compatibility with higher dims """
         if axis != 0:

From 9ddc460d6269aa51d7b5b4d6984ef4df7a86ec83 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 10 Mar 2015 11:34:10 -0700
Subject: [PATCH 145/239] improve error message when importing pandas from
 source directory

---
 pandas/__init__.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index 939495d3687ad..2a142a6ff2072 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -4,17 +4,13 @@
 __docformat__ = 'restructuredtext'
 
 try:
-    from . import hashtable, tslib, lib
-except Exception:  # pragma: no cover
-    import sys
-    e = sys.exc_info()[1]  # Py25 and Py3 current exception syntax conflict
-    print(e)
-    if 'No module named lib' in str(e):
-        raise ImportError('C extensions not built: if you installed already '
-                          'verify that you are not importing from the source '
-                          'directory')
-    else:
-        raise
+    from pandas import hashtable, tslib, lib
+except ImportError as e:  # pragma: no cover
+    module = str(e).lstrip('cannot import name ')  # hack but overkill to use re
+    raise ImportError("C extension: {0} not built. If you want to import "
+                      "pandas from the source directory, you may need to run "
+                      "'python setup.py build_ext --inplace' to build the C "
+                      "extensions first.".format(module))
 
 from datetime import datetime
 import numpy as np

From 378a58eff5f9eb084074c243727e9827ffbcf067 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 25 Apr 2015 14:00:54 +0200
Subject: [PATCH 146/239] DOC: clean up / consistent imports (GH9886)

---
 doc/source/computation.rst   |  41 ++++++------
 doc/source/missing_data.rst  | 105 +++++++++++++------------------
 doc/source/reshaping.rst     |  97 ++++++++++++++--------------
 doc/source/visualization.rst | 119 +++++++++++++++--------------------
 4 files changed, 159 insertions(+), 203 deletions(-)

diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 4b0fe39d929a9..4621d7bd9b216 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -1,23 +1,22 @@
 .. currentmodule:: pandas
-.. _computation:
 
 .. ipython:: python
    :suppress:
 
    import numpy as np
    np.random.seed(123456)
-   from pandas import *
-   import pandas.util.testing as tm
-   randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
+   import pandas as pd
    import matplotlib
    try:
       matplotlib.style.use('ggplot')
    except AttributeError:
-      options.display.mpl_style = 'default'
+      pd.options.display.mpl_style = 'default'
    import matplotlib.pyplot as plt
    plt.close('all')
-   options.display.max_rows=15
+   pd.options.display.max_rows=15
+
+.. _computation:
 
 Computational tools
 ===================
@@ -36,13 +35,13 @@ NA/null values *before* computing the percent change).
 
 .. ipython:: python
 
-   ser = Series(randn(8))
+   ser = pd.Series(np.random.randn(8))
 
    ser.pct_change()
 
 .. ipython:: python
 
-   df = DataFrame(randn(10, 4))
+   df = pd.DataFrame(np.random.randn(10, 4))
 
    df.pct_change(periods=3)
 
@@ -56,8 +55,8 @@ The ``Series`` object has a method ``cov`` to compute covariance between series
 
 .. ipython:: python
 
-   s1 = Series(randn(1000))
-   s2 = Series(randn(1000))
+   s1 = pd.Series(np.random.randn(1000))
+   s2 = pd.Series(np.random.randn(1000))
    s1.cov(s2)
 
 Analogously, ``DataFrame`` has a method ``cov`` to compute pairwise covariances
@@ -78,7 +77,7 @@ among the series in the DataFrame, also excluding NA/null values.
 
 .. ipython:: python
 
-   frame = DataFrame(randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
    frame.cov()
 
 ``DataFrame.cov`` also supports an optional ``min_periods`` keyword that
@@ -87,7 +86,7 @@ in order to have a valid result.
 
 .. ipython:: python
 
-   frame = DataFrame(randn(20, 3), columns=['a', 'b', 'c'])
+   frame = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])
    frame.ix[:5, 'a'] = np.nan
    frame.ix[5:10, 'b'] = np.nan
 
@@ -123,7 +122,7 @@ All of these are currently computed using pairwise complete observations.
 
 .. ipython:: python
 
-   frame = DataFrame(randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
    frame.ix[::2] = np.nan
 
    # Series with Series
@@ -140,7 +139,7 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
 
 .. ipython:: python
 
-   frame = DataFrame(randn(20, 3), columns=['a', 'b', 'c'])
+   frame = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])
    frame.ix[:5, 'a'] = np.nan
    frame.ix[5:10, 'b'] = np.nan
 
@@ -157,8 +156,8 @@ objects.
 
    index = ['a', 'b', 'c', 'd', 'e']
    columns = ['one', 'two', 'three', 'four']
-   df1 = DataFrame(randn(5, 4), index=index, columns=columns)
-   df2 = DataFrame(randn(4, 4), index=index[:4], columns=columns)
+   df1 = pd.DataFrame(np.random.randn(5, 4), index=index, columns=columns)
+   df2 = pd.DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
    df1.corrwith(df2)
    df2.corrwith(df1, axis=1)
 
@@ -172,7 +171,7 @@ of the ranks (by default) for the group:
 
 .. ipython:: python
 
-   s = Series(np.random.randn(5), index=list('abcde'))
+   s = pd.Series(np.random.np.random.randn(5), index=list('abcde'))
    s['d'] = s['b'] # so there's a tie
    s.rank()
 
@@ -181,7 +180,7 @@ or the columns (``axis=1``). ``NaN`` values are excluded from the ranking.
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(10, 6))
+   df = pd.DataFrame(np.random.np.random.randn(10, 6))
    df[4] = df[2][:5] # some ties
    df
    df.rank(1)
@@ -253,7 +252,7 @@ These functions can be applied to ndarrays or Series objects:
 
 .. ipython:: python
 
-   ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
+   ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
 
    ts.plot(style='k--')
@@ -271,7 +270,7 @@ sugar for applying the moving window operator to all of the DataFrame's columns:
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 4), index=ts.index,
+   df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
                   columns=['A', 'B', 'C', 'D'])
    df = df.cumsum()
 
@@ -310,7 +309,7 @@ keyword. The list of recognized types are:
 
 .. ipython:: python
 
-   ser = Series(randn(10), index=date_range('1/1/2000', periods=10))
+   ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10))
 
    rolling_window(ser, 5, 'triang')
 
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index 4505d256d31f6..04a6302f958a2 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -1,11 +1,19 @@
 .. currentmodule:: pandas
-.. _missing_data:
 
 .. ipython:: python
    :suppress:
 
-   from pandas import *
-   options.display.max_rows=15
+   import numpy as np
+   import pandas as pd
+   pd.options.display.max_rows=15
+   import matplotlib
+   try:
+      matplotlib.style.use('ggplot')
+   except AttributeError:
+      pd.options.display.mpl_style = 'default'
+   import matplotlib.pyplot as plt
+
+.. _missing_data:
 
 *************************
 Working with missing data
@@ -14,14 +22,6 @@ Working with missing data
 In this section, we will discuss missing (also referred to as NA) values in
 pandas.
 
-.. ipython:: python
-   :suppress:
-
-   import numpy as np; randn = np.random.randn; randint =np.random.randint
-   from pandas import *
-   import matplotlib.pyplot as plt
-   from pandas.compat import lrange
-
 .. note::
 
     The choice of using ``NaN`` internally to denote missing data was largely
@@ -50,8 +50,8 @@ a data set is by reindexing. For example
 
 .. ipython:: python
 
-   df = DataFrame(randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],
-                  columns=['one', 'two', 'three'])
+   df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],
+                     columns=['one', 'two', 'three'])
    df['four'] = 'bar'
    df['five'] = df['one'] > 0
    df
@@ -118,7 +118,7 @@ the missing value type chosen:
 
 .. ipython:: python
 
-   s = Series([1, 2, 3])
+   s = pd.Series([1, 2, 3])
    s.loc[0] = None
    s
 
@@ -128,7 +128,7 @@ For object containers, pandas will use the value given:
 
 .. ipython:: python
 
-   s = Series(["a", "b", "c"])
+   s = pd.Series(["a", "b", "c"])
    s.loc[0] = None
    s.loc[1] = np.nan
    s
@@ -255,7 +255,7 @@ use case of this is to fill a DataFrame with the mean of that column.
 
 .. ipython:: python
 
-        dff = DataFrame(np.random.randn(10,3),columns=list('ABC'))
+        dff = pd.DataFrame(np.random.randn(10,3),columns=list('ABC'))
         dff.iloc[3:5,0] = np.nan
         dff.iloc[4:6,1] = np.nan
         dff.iloc[5:8,2] = np.nan
@@ -307,7 +307,7 @@ Interpolation
 .. versionadded:: 0.13.0
 
   :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have
-  revamped interpolation methods and functionaility.
+  revamped interpolation methods and functionality.
 
 Both Series and Dataframe objects have an ``interpolate`` method that, by default,
 performs linear interpolation at missing datapoints.
@@ -317,7 +317,7 @@ performs linear interpolation at missing datapoints.
 
    np.random.seed(123456)
    idx = date_range('1/1/2000', periods=100, freq='BM')
-   ts = Series(randn(100), index=idx)
+   ts = pd.Series(np.random.randn(100), index=idx)
    ts[1:20] = np.nan
    ts[60:80] = np.nan
    ts = ts.cumsum()
@@ -328,7 +328,6 @@ performs linear interpolation at missing datapoints.
    ts.count()
    ts.interpolate().count()
 
-   plt.figure()
    @savefig series_interpolate.png
    ts.interpolate().plot()
 
@@ -351,7 +350,7 @@ For a floating-point index, use ``method='values'``:
    :suppress:
 
    idx = [0., 1., 10.]
-   ser = Series([0., np.nan, 10.], idx)
+   ser = pd.Series([0., np.nan, 10.], idx)
 
 .. ipython:: python
 
@@ -363,7 +362,7 @@ You can also interpolate with a DataFrame:
 
 .. ipython:: python
 
-   df = DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8],
+   df = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8],
                    'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]})
    df
    df.interpolate()
@@ -401,13 +400,12 @@ Compare several methods:
 
    np.random.seed(2)
 
-   ser = Series(np.arange(1, 10.1, .25)**2 + np.random.randn(37))
+   ser = pd.Series(np.arange(1, 10.1, .25)**2 + np.random.randn(37))
    bad = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
    ser[bad] = np.nan
    methods = ['linear', 'quadratic', 'cubic']
 
-   df = DataFrame({m: ser.interpolate(method=m) for m in methods})
-   plt.figure()
+   df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})
    @savefig compare_interpolations.png
    df.plot()
 
@@ -419,7 +417,7 @@ at the new values.
 
 .. ipython:: python
 
-   ser = Series(np.sort(np.random.uniform(size=100)))
+   ser = pd.Series(np.sort(np.random.uniform(size=100)))
 
    # interpolate at new_index
    new_index = ser.index | Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
@@ -438,7 +436,7 @@ observation:
 
 .. ipython:: python
 
-   ser = Series([1, 3, np.nan, np.nan, np.nan, 11])
+   ser = pd.Series([1, 3, np.nan, np.nan, np.nan, 11])
    ser.interpolate(limit=2)
 
 .. _missing_data.replace:
@@ -454,7 +452,7 @@ value:
 
 .. ipython:: python
 
-   ser = Series([0., 1., 2., 3., 4.])
+   ser = pd.Series([0., 1., 2., 3., 4.])
 
    ser.replace(0, 5)
 
@@ -474,7 +472,7 @@ For a DataFrame, you can specify individual values by column:
 
 .. ipython:: python
 
-   df = DataFrame({'a': [0, 1, 2, 3, 4], 'b': [5, 6, 7, 8, 9]})
+   df = pd.DataFrame({'a': [0, 1, 2, 3, 4], 'b': [5, 6, 7, 8, 9]})
 
    df.replace({'a': 0, 'b': 5}, 100)
 
@@ -502,31 +500,24 @@ String/Regular Expression Replacement
 
 Replace the '.' with ``nan`` (str -> str)
 
-.. ipython:: python
-   :suppress:
-
-   from numpy.random import rand, randn
-   from numpy import nan
-   from pandas import DataFrame
-
 .. ipython:: python
 
-   d = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
-   df = DataFrame(d)
-   df.replace('.', nan)
+   d = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
+   df = pd.DataFrame(d)
+   df.replace('.', np.nan)
 
 Now do it with a regular expression that removes surrounding whitespace
 (regex -> regex)
 
 .. ipython:: python
 
-   df.replace(r'\s*\.\s*', nan, regex=True)
+   df.replace(r'\s*\.\s*', np.nan, regex=True)
 
 Replace a few different values (list -> list)
 
 .. ipython:: python
 
-   df.replace(['a', '.'], ['b', nan])
+   df.replace(['a', '.'], ['b', np.nan])
 
 list of regex -> list of regex
 
@@ -538,14 +529,14 @@ Only search in column ``'b'`` (dict -> dict)
 
 .. ipython:: python
 
-   df.replace({'b': '.'}, {'b': nan})
+   df.replace({'b': '.'}, {'b': np.nan})
 
 Same as the previous example, but use a regular expression for
 searching instead (dict of regex -> dict)
 
 .. ipython:: python
 
-   df.replace({'b': r'\s*\.\s*'}, {'b': nan}, regex=True)
+   df.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True)
 
 You can pass nested dictionaries of regular expressions that use ``regex=True``
 
@@ -557,7 +548,7 @@ or you can pass the nested dictionary like so
 
 .. ipython:: python
 
-   df.replace(regex={'b': {r'\s*\.\s*': nan}})
+   df.replace(regex={'b': {r'\s*\.\s*': np.nan}})
 
 You can also use the group of a regular expression match when replacing (dict
 of regex -> dict of regex), this works for lists as well
@@ -571,7 +562,7 @@ will be replaced with a scalar (list of regex -> regex)
 
 .. ipython:: python
 
-   df.replace([r'\s*\.\s*', r'a|b'], nan, regex=True)
+   df.replace([r'\s*\.\s*', r'a|b'], np.nan, regex=True)
 
 All of the regular expression examples can also be passed with the
 ``to_replace`` argument as the ``regex`` argument. In this case the ``value``
@@ -580,7 +571,7 @@ dictionary. The previous example, in this case, would then be
 
 .. ipython:: python
 
-   df.replace(regex=[r'\s*\.\s*', r'a|b'], value=nan)
+   df.replace(regex=[r'\s*\.\s*', r'a|b'], value=np.nan)
 
 This can be convenient if you do not want to pass ``regex=True`` every time you
 want to use a regular expression.
@@ -595,33 +586,25 @@ Numeric Replacement
 
 Similar to ``DataFrame.fillna``
 
-.. ipython:: python
-   :suppress:
-
-   from numpy.random import rand, randn
-   from numpy import nan
-   from pandas import DataFrame
-   from pandas.util.testing import assert_frame_equal
-
 .. ipython:: python
 
-   df = DataFrame(randn(10, 2))
-   df[rand(df.shape[0]) > 0.5] = 1.5
-   df.replace(1.5, nan)
+   df = pd.DataFrame(np.random.randn(10, 2))
+   df[np.random.rand(df.shape[0]) > 0.5] = 1.5
+   df.replace(1.5, np.nan)
 
 Replacing more than one value via lists works as well
 
 .. ipython:: python
 
    df00 = df.values[0, 0]
-   df.replace([1.5, df00], [nan, 'a'])
+   df.replace([1.5, df00], [np.nan, 'a'])
    df[1].dtype
 
 You can also operate on the DataFrame in place
 
 .. ipython:: python
 
-   df.replace(1.5, nan, inplace=True)
+   df.replace(1.5, np.nan, inplace=True)
 
 .. warning::
 
@@ -631,7 +614,7 @@ You can also operate on the DataFrame in place
 
    .. code-block:: python
 
-      s = Series([True, False, True])
+      s = pd.Series([True, False, True])
       s.replace({'a string': 'new value', True: False})  # raises
 
       TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
@@ -643,7 +626,7 @@ You can also operate on the DataFrame in place
 
    .. ipython:: python
 
-      s = Series([True, False, True])
+      s = pd.Series([True, False, True])
       s.replace('a string', 'another string')
 
    the original ``NDFrame`` object will be returned untouched. We're working on
@@ -672,7 +655,7 @@ For example:
 
 .. ipython:: python
 
-   s = Series(randn(5), index=[0, 2, 4, 6, 7])
+   s = pd.Series(np.random.randn(5), index=[0, 2, 4, 6, 7])
    s > 0
    (s > 0).dtype
    crit = (s > 0).reindex(list(range(8)))
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index dc13ce3e5c4da..26aaf9c2be69d 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -6,14 +6,9 @@
 
    import numpy as np
    np.random.seed(123456)
-   from pandas import *
-   options.display.max_rows=15
-   from pandas.core.reshape import *
-   import pandas.util.testing as tm
-   randn = np.random.randn
+   import pandas as pd
+   pd.options.display.max_rows=15
    np.set_printoptions(precision=4, suppress=True)
-   from pandas.tools.tile import *
-   from pandas.compat import zip
 
 **************************
 Reshaping and Pivot Tables
@@ -56,7 +51,7 @@ For the curious here is how the above DataFrame was created:
        data = {'value' : frame.values.ravel('F'),
                'variable' : np.asarray(frame.columns).repeat(N),
                'date' : np.tile(np.asarray(frame.index), K)}
-       return DataFrame(data, columns=['date', 'variable', 'value'])
+       return pd.DataFrame(data, columns=['date', 'variable', 'value'])
    df = unpivot(tm.makeTimeDataFrame())
 
 To select out everything for variable ``A`` we could do:
@@ -119,11 +114,11 @@ from the hierarchical indexing section:
 .. ipython:: python
 
    tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
-                   'foo', 'foo', 'qux', 'qux'],
-                  ['one', 'two', 'one', 'two',
-                   'one', 'two', 'one', 'two']]))
-   index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
-   df = DataFrame(randn(8, 2), index=index, columns=['A', 'B'])
+                        'foo', 'foo', 'qux', 'qux'],
+                       ['one', 'two', 'one', 'two',
+                        'one', 'two', 'one', 'two']]))
+   index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
+   df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
    df2 = df[:4]
    df2
 
@@ -166,8 +161,8 @@ will result in a **sorted** copy of the original DataFrame or Series:
 
 .. ipython:: python
 
-   index = MultiIndex.from_product([[2,1], ['a', 'b']])
-   df = DataFrame(randn(4), index=index, columns=['A'])
+   index = pd.MultiIndex.from_product([[2,1], ['a', 'b']])
+   df = pd.DataFrame(np.random.randn(4), index=index, columns=['A'])
    df
    all(df.unstack().stack() == df.sort())
 
@@ -185,13 +180,13 @@ processed individually.
 
 .. ipython:: python
 
-    columns = MultiIndex.from_tuples([
+    columns = pd.MultiIndex.from_tuples([
             ('A', 'cat', 'long'), ('B', 'cat', 'long'),
             ('A', 'dog', 'short'), ('B', 'dog', 'short')
         ],
         names=['exp', 'animal', 'hair_length']
     )
-    df = DataFrame(randn(4, 4), columns=columns)
+    df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
     df
 
     df.stack(level=['animal', 'hair_length'])
@@ -215,12 +210,13 @@ calling ``sortlevel``, of course). Here is a more complex example:
 
 .. ipython:: python
 
-   columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'),
-                                     ('B', 'cat'), ('A', 'dog')],
-                                    names=['exp', 'animal'])
-   index = MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'), ('one', 'two')],
-                                   names=['first', 'second'])
-   df = DataFrame(randn(8, 4), index=index, columns=columns)
+   columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'),
+                                        ('B', 'cat'), ('A', 'dog')],
+                                       names=['exp', 'animal'])
+   index = pd.MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'),
+                                       ('one', 'two')],
+                                      names=['first', 'second'])
+   df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)
    df2 = df.ix[[0, 1, 2, 4, 5, 7]]
    df2
 
@@ -259,13 +255,13 @@ For instance,
 
 .. ipython:: python
 
-   cheese = DataFrame({'first' : ['John', 'Mary'],
-                       'last' : ['Doe', 'Bo'],
-                       'height' : [5.5, 6.0],
-                       'weight' : [130, 150]})
+   cheese = pd.DataFrame({'first' : ['John', 'Mary'],
+                          'last' : ['Doe', 'Bo'],
+                          'height' : [5.5, 6.0],
+                          'weight' : [130, 150]})
    cheese
-   melt(cheese, id_vars=['first', 'last'])
-   melt(cheese, id_vars=['first', 'last'], var_name='quantity')
+   pd.melt(cheese, id_vars=['first', 'last'])
+   pd.melt(cheese, id_vars=['first', 'last'], var_name='quantity')
 
 Another way to transform is to use the ``wide_to_long`` panel data convenience function.
 
@@ -324,22 +320,22 @@ Consider a data set like this:
 .. ipython:: python
 
    import datetime
-   df = DataFrame({'A' : ['one', 'one', 'two', 'three'] * 6,
-                   'B' : ['A', 'B', 'C'] * 8,
-                   'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
-                   'D' : np.random.randn(24),
-                   'E' : np.random.randn(24),
-                   'F' : [datetime.datetime(2013, i, 1) for i in range(1, 13)] +
-                         [datetime.datetime(2013, i, 15) for i in range(1, 13)]})
+   df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,
+                      'B': ['A', 'B', 'C'] * 8,
+                      'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
+                      'D': np.random.randn(24),
+                      'E': np.random.randn(24),
+                      'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)] +
+                           [datetime.datetime(2013, i, 15) for i in range(1, 13)]})
    df
 
 We can produce pivot tables from this data very easily:
 
 .. ipython:: python
 
-   pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
-   pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum)
-   pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum)
+   pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
+   pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum)
+   pd.pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum)
 
 The result object is a DataFrame having potentially hierarchical indexes on the
 rows and columns. If the ``values`` column name is not given, the pivot table
@@ -348,20 +344,20 @@ hierarchy in the columns:
 
 .. ipython:: python
 
-   pivot_table(df, index=['A', 'B'], columns=['C'])
+   pd.pivot_table(df, index=['A', 'B'], columns=['C'])
 
 Also, you can use ``Grouper`` for ``index`` and ``columns`` keywords. For detail of ``Grouper``, see :ref:`Grouping with a Grouper specification <groupby.specify>`.
 
 .. ipython:: python
 
-   pivot_table(df, values='D', index=Grouper(freq='M', key='F'), columns='C')
+   pd.pivot_table(df, values='D', index=Grouper(freq='M', key='F'), columns='C')
 
 You can render a nice output of the table omitting the missing values by
 calling ``to_string`` if you wish:
 
 .. ipython:: python
 
-   table = pivot_table(df, index=['A', 'B'], columns=['C'])
+   table = pd.pivot_table(df, index=['A', 'B'], columns=['C'])
    print(table.to_string(na_rep=''))
 
 Note that ``pivot_table`` is also available as an instance method on DataFrame.
@@ -397,7 +393,7 @@ For example:
     a = np.array([foo, foo, bar, bar, foo, foo], dtype=object)
     b = np.array([one, one, two, one, two, one], dtype=object)
     c = np.array([dull, dull, shiny, dull, dull, shiny], dtype=object)
-    crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
+    pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
 
 .. _reshaping.pivot.margins:
 
@@ -428,14 +424,14 @@ variables:
    ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
 
 
-   cut(ages, bins=3)
+   pd.cut(ages, bins=3)
 
 If the ``bins`` keyword is an integer, then equal-width bins are formed.
 Alternatively we can specify custom bin-edges:
 
 .. ipython:: python
 
-   cut(ages, bins=[0, 18, 35, 70])
+   pd.cut(ages, bins=[0, 18, 35, 70])
 
 
 .. _reshaping.dummies:
@@ -449,17 +445,16 @@ containing ``k`` columns of 1s and 0s:
 
 .. ipython:: python
 
-   df = DataFrame({'key': list('bbacab'), 'data1': range(6)})
+   df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})
 
-
-   get_dummies(df['key'])
+   pd.get_dummies(df['key'])
 
 Sometimes it's useful to prefix the column names, for example when merging the result
 with the original DataFrame:
 
 .. ipython:: python
 
-   dummies = get_dummies(df['key'], prefix='key')
+   dummies = pd.get_dummies(df['key'], prefix='key')
    dummies
 
 
@@ -469,14 +464,14 @@ This function is often used along with discretization functions like ``cut``:
 
 .. ipython:: python
 
-   values = randn(10)
+   values = np.random.randn(10)
    values
 
 
    bins = [0, 0.2, 0.4, 0.6, 0.8, 1]
 
 
-   get_dummies(cut(values, bins))
+   pd.get_dummies(pd.cut(values, bins))
 
 See also :func:`Series.str.get_dummies <pandas.Series.str.get_dummies>`.
 
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 43fa6ea759b33..6dfeeadeb0167 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -6,20 +6,16 @@
 
    import numpy as np
    import pandas as pd
-   from numpy.random import randn, rand, randint
    np.random.seed(123456)
-   from pandas import DataFrame, Series, date_range, options
-   import pandas.util.testing as tm
    np.set_printoptions(precision=4, suppress=True)
-   import matplotlib.pyplot as plt
-   plt.close('all')
+   pd.options.display.max_rows = 15
    import matplotlib
    try:
       matplotlib.style.use('ggplot')
    except AttributeError:
-      options.display.mpl_style = 'default'
-   options.display.max_rows = 15
-   from pandas.compat import lrange
+      pd.options.display.mpl_style = 'default'
+   import matplotlib.pyplot as plt
+   plt.close('all')
 
 ********
 Plotting
@@ -68,7 +64,7 @@ The ``plot`` method on Series and DataFrame is just a simple wrapper around
 
 .. ipython:: python
 
-   ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
+   ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
 
    @savefig series_plot_basic.png
@@ -87,7 +83,7 @@ On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the column
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD'))
+   df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD'))
    df = df.cumsum()
 
    @savefig frame_plot_basic.png
@@ -105,8 +101,8 @@ You can plot one column versus another using the `x` and `y` keywords in
 
 .. ipython:: python
 
-   df3 = DataFrame(randn(1000, 2), columns=['B', 'C']).cumsum()
-   df3['A'] = Series(list(range(len(df))))
+   df3 = pd.DataFrame(np.random.randn(1000, 2), columns=['B', 'C']).cumsum()
+   df3['A'] = pd.Series(list(range(len(df))))
 
    @savefig df_plot_xy.png
    df3.plot(x='A', y='B')
@@ -182,7 +178,7 @@ bar plot:
 
 .. ipython:: python
 
-   df2 = DataFrame(rand(10, 4), columns=['a', 'b', 'c', 'd'])
+   df2 = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
 
    @savefig bar_plot_multi_ex.png
    df2.plot(kind='bar');
@@ -224,8 +220,8 @@ Histogram can be drawn specifying ``kind='hist'``.
 
 .. ipython:: python
 
-   df4 = DataFrame({'a': randn(1000) + 1, 'b': randn(1000),
-                    'c': randn(1000) - 1}, columns=['a', 'b', 'c'])
+   df4 = pd.DataFrame({'a': randn(1000) + 1, 'b': randn(1000),
+                       'c': randn(1000) - 1}, columns=['a', 'b', 'c'])
 
    plt.figure();
 
@@ -308,10 +304,10 @@ The ``by`` keyword can be specified to plot grouped histograms:
 
 .. ipython:: python
 
-   data = Series(randn(1000))
+   data = pd.Series(np.random.randn(1000))
 
    @savefig grouped_hist.png
-   data.hist(by=randint(0, 4, 1000), figsize=(6, 4))
+   data.hist(by=np.random.randint(0, 4, 1000), figsize=(6, 4))
 
 
 .. _visualization.box:
@@ -337,7 +333,7 @@ a uniform random variable on [0,1).
 
 .. ipython:: python
 
-   df = DataFrame(rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
+   df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
 
    @savefig box_plot_new.png
    df.plot(kind='box')
@@ -392,7 +388,7 @@ The existing interface ``DataFrame.boxplot`` to plot boxplot still can be used.
 
 .. ipython:: python
 
-   df = DataFrame(rand(10,5))
+   df = pd.DataFrame(np.random.rand(10,5))
    plt.figure();
 
    @savefig box_plot_ex.png
@@ -410,8 +406,8 @@ groupings.  For instance,
 .. ipython:: python
    :okwarning:
 
-   df = DataFrame(rand(10,2), columns=['Col1', 'Col2'] )
-   df['X'] = Series(['A','A','A','A','A','B','B','B','B','B'])
+   df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
+   df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B'])
 
    plt.figure();
 
@@ -430,9 +426,9 @@ columns:
 .. ipython:: python
    :okwarning:
 
-   df = DataFrame(rand(10,3), columns=['Col1', 'Col2', 'Col3'])
-   df['X'] = Series(['A','A','A','A','A','B','B','B','B','B'])
-   df['Y'] = Series(['A','B','A','B','A','B','A','B','A','B'])
+   df = pd.DataFrame(np.random.rand(10,3), columns=['Col1', 'Col2', 'Col3'])
+   df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B'])
+   df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B'])
 
    plt.figure();
 
@@ -473,7 +469,7 @@ DataFrame.
    :okwarning:
 
    np.random.seed(1234)
-   df_box = DataFrame(np.random.randn(50, 2))
+   df_box = pd.DataFrame(np.random.randn(50, 2))
    df_box['g'] = np.random.choice(['A', 'B'], size=50)
    df_box.loc[df_box['g'] == 'B', 1] += 3
 
@@ -517,7 +513,7 @@ When input data contains `NaN`, it will be automatically filled by 0. If you wan
 
 .. ipython:: python
 
-   df = DataFrame(rand(10, 4), columns=['a', 'b', 'c', 'd'])
+   df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
 
    @savefig area_plot_stacked.png
    df.plot(kind='area');
@@ -555,7 +551,7 @@ These can be specified by ``x`` and ``y`` keywords each.
 
 .. ipython:: python
 
-   df = DataFrame(rand(50, 4), columns=['a', 'b', 'c', 'd'])
+   df = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd'])
 
    @savefig scatter_plot.png
    df.plot(kind='scatter', x='a', y='b');
@@ -626,7 +622,7 @@ too dense to plot each point individually.
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 2), columns=['a', 'b'])
+   df = pd.DataFrame(np.random.randn(1000, 2), columns=['a', 'b'])
    df['b'] = df['b'] + np.arange(1000)
 
    @savefig hexbin_plot.png
@@ -654,7 +650,7 @@ given by column ``z``. The bins are aggregated with numpy's ``max`` function.
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 2), columns=['a', 'b'])
+   df = pd.DataFrame(np.random.randn(1000, 2), columns=['a', 'b'])
    df['b'] = df['b'] = df['b'] + np.arange(1000)
    df['z'] = np.random.uniform(0, 3, 1000)
 
@@ -689,7 +685,7 @@ A ``ValueError`` will be raised if there are any negative values in your data.
 
 .. ipython:: python
 
-   series = Series(3 * rand(4), index=['a', 'b', 'c', 'd'], name='series')
+   series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series')
 
    @savefig series_pie_plot.png
    series.plot(kind='pie', figsize=(6, 6))
@@ -716,7 +712,7 @@ A legend will be drawn in each pie plots by default; specify ``legend=False`` to
 
 .. ipython:: python
 
-   df = DataFrame(3 * rand(4, 2), index=['a', 'b', 'c', 'd'], columns=['x', 'y'])
+   df = pd.DataFrame(3 * np.random.rand(4, 2), index=['a', 'b', 'c', 'd'], columns=['x', 'y'])
 
    @savefig df_pie_plot.png
    df.plot(kind='pie', subplots=True, figsize=(8, 4))
@@ -759,7 +755,7 @@ If you pass values whose sum total is less than 1.0, matplotlib draws a semicirc
 
 .. ipython:: python
 
-   series = Series([0.1] * 4, index=['a', 'b', 'c', 'd'], name='series2')
+   series = pd.Series([0.1] * 4, index=['a', 'b', 'c', 'd'], name='series2')
 
    @savefig series_pie_plot_semi.png
    series.plot(kind='pie', figsize=(6, 6))
@@ -835,7 +831,7 @@ You can create a scatter plot matrix using the
 .. ipython:: python
 
    from pandas.tools.plotting import scatter_matrix
-   df = DataFrame(randn(1000, 4), columns=['a', 'b', 'c', 'd'])
+   df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd'])
 
    @savefig scatter_matrix_kde.png
    scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde')
@@ -863,7 +859,7 @@ setting ``kind='kde'``:
 
 .. ipython:: python
 
-   ser = Series(randn(1000))
+   ser = pd.Series(np.random.randn(1000))
 
    @savefig kde_plot.png
    ser.plot(kind='kde')
@@ -888,10 +884,9 @@ of the same class will usually be closer together and form larger structures.
 
 .. ipython:: python
 
-   from pandas import read_csv
    from pandas.tools.plotting import andrews_curves
 
-   data = read_csv('data/iris.data')
+   data = pd.read_csv('data/iris.data')
 
    plt.figure()
 
@@ -911,10 +906,9 @@ represents one data point. Points that tend to cluster will appear closer togeth
 
 .. ipython:: python
 
-   from pandas import read_csv
    from pandas.tools.plotting import parallel_coordinates
 
-   data = read_csv('data/iris.data')
+   data = pd.read_csv('data/iris.data')
 
    plt.figure()
 
@@ -946,8 +940,8 @@ implies that the underlying data are not random.
 
    plt.figure()
 
-   data = Series(0.1 * rand(1000) +
-      0.9 * np.sin(np.linspace(-99 * np.pi, 99 * np.pi, num=1000)))
+   data = pd.Series(0.1 * np.random.rand(1000) +
+       0.9 * np.sin(np.linspace(-99 * np.pi, 99 * np.pi, num=1000)))
 
    @savefig lag_plot.png
    lag_plot(data)
@@ -981,7 +975,7 @@ confidence band.
 
    plt.figure()
 
-   data = Series(0.7 * rand(1000) +
+   data = pd.Series(0.7 * np.random.rand(1000) +
       0.3 * np.sin(np.linspace(-9 * np.pi, 9 * np.pi, num=1000)))
 
    @savefig autocorrelation_plot.png
@@ -1012,7 +1006,7 @@ are what constitutes the bootstrap plot.
 
    from pandas.tools.plotting import bootstrap_plot
 
-   data = Series(rand(1000))
+   data = pd.Series(np.random.rand(1000))
 
    @savefig bootstrap_plot.png
    bootstrap_plot(data, size=50, samples=500, color='grey')
@@ -1042,10 +1036,9 @@ be colored differently.
 
 .. ipython:: python
 
-   from pandas import read_csv
    from pandas.tools.plotting import radviz
 
-   data = read_csv('data/iris.data')
+   data = pd.read_csv('data/iris.data')
 
    plt.figure()
 
@@ -1095,7 +1088,7 @@ shown by default.
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD'))
+   df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD'))
    df = df.cumsum()
 
    @savefig frame_plot_basic_noleg.png
@@ -1119,7 +1112,7 @@ You may pass ``logy`` to get a log-scale Y axis.
 
 .. ipython:: python
 
-   ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
+   ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
    ts = np.exp(ts.cumsum())
 
    @savefig series_plot_logy.png
@@ -1227,8 +1220,6 @@ in ``pandas.plot_params`` can be used in a `with statement`:
 
 .. ipython:: python
 
-   import pandas as pd
-
    plt.figure()
 
    @savefig ser_plot_suppress_context.png
@@ -1325,10 +1316,10 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a
    :suppress:
 
    np.random.seed(123456)
-   ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
+   ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
 
-   df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD'))
+   df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD'))
    df = df.cumsum()
 
 .. ipython:: python
@@ -1410,7 +1401,7 @@ Plotting with matplotlib table is now supported in  :meth:`DataFrame.plot` and :
 .. ipython:: python
 
    fig, ax = plt.subplots(1, 1)
-   df = DataFrame(rand(5, 3), columns=['a', 'b', 'c'])
+   df = pd.DataFrame(np.random.rand(5, 3), columns=['a', 'b', 'c'])
    ax.get_xaxis().set_visible(False)   # Hide Ticks
 
    @savefig line_plot_table_true.png
@@ -1482,7 +1473,7 @@ To use the cubehelix colormap, we can simply pass ``'cubehelix'`` to ``colormap=
 
 .. ipython:: python
 
-   df = DataFrame(randn(1000, 10), index=ts.index)
+   df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index)
    df = df.cumsum()
 
    plt.figure()
@@ -1520,7 +1511,7 @@ Colormaps can also be used other plot types, like bar charts:
 
 .. ipython:: python
 
-   dd = DataFrame(randn(10, 10)).applymap(abs)
+   dd = pd.DataFrame(np.random.randn(10, 10)).applymap(abs)
    dd = dd.cumsum()
 
    plt.figure()
@@ -1587,8 +1578,8 @@ when plotting a large number of points.
 
 .. ipython:: python
 
-   price = Series(randn(150).cumsum(),
-                  index=date_range('2000-1-1', periods=150, freq='B'))
+   price = pd.Series(np.random.randn(150).cumsum(),
+                     index=pd.date_range('2000-1-1', periods=150, freq='B'))
    ma = pd.rolling_mean(price, 20)
    mstd = pd.rolling_std(price, 20)
 
@@ -1624,18 +1615,8 @@ Trellis plotting interface
 .. ipython:: python
    :suppress:
 
-   import numpy as np
-   np.random.seed(123456)
-   from pandas import *
-   options.display.max_rows=15
-   import pandas.util.testing as tm
-   randn = np.random.randn
-   np.set_printoptions(precision=4, suppress=True)
-   import matplotlib.pyplot as plt
-   tips_data = read_csv('data/tips.csv')
-   iris_data = read_csv('data/iris.data')
-   from pandas import read_csv
-   from pandas.tools.plotting import radviz
+   tips_data = pd.read_csv('data/tips.csv')
+   iris_data = pd.read_csv('data/iris.data')
    plt.close('all')
 
 
@@ -1646,8 +1627,7 @@ Trellis plotting interface
 
    .. code-block:: python
 
-      from pandas import read_csv
-      tips_data = read_csv('tips.csv')
+      tips_data = pd.read_csv('tips.csv')
 
    from the directory where you downloaded the file.
 
@@ -1668,7 +1648,6 @@ In the example below, data from the tips data set is arranged by the attributes
 values, the resulting grid has two columns and two rows. A histogram is
 displayed for each cell of the grid.
 
-
 .. ipython:: python
 
    plt.figure()

From cc51f3df89b19dde08d25f4d7ee63f9052fa85f4 Mon Sep 17 00:00:00 2001
From: jnmclarty <jeffrey.mclarty@gmail.com>
Date: Sun, 3 May 2015 11:33:36 -0400
Subject: [PATCH 147/239] DOC: infer_freq, inferred_freq

---
 doc/source/api.rst            | 3 +++
 pandas/tseries/base.py        | 5 +++++
 pandas/tseries/frequencies.py | 5 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index d442d8631247c..e63902a0910b3 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -158,6 +158,7 @@ Top-level dealing with datetimelike
    bdate_range
    period_range
    timedelta_range
+   infer_freq
 
 Top-level evaluation
 ~~~~~~~~~~~~~~~~~~~~
@@ -1364,6 +1365,7 @@ Time/Date Components
    DatetimeIndex.is_quarter_end
    DatetimeIndex.is_year_start
    DatetimeIndex.is_year_end
+   DatetimeIndex.inferred_freq
 
 Selecting
 ~~~~~~~~~
@@ -1414,6 +1416,7 @@ Components
    TimedeltaIndex.microseconds
    TimedeltaIndex.nanoseconds
    TimedeltaIndex.components
+   TimedeltaIndex.inferred_freq
 
 Conversion
 ~~~~~~~~~~
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index f15de87dbd81c..5f3130bd2dd9c 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -79,6 +79,11 @@ def freqstr(self):
 
     @cache_readonly
     def inferred_freq(self):
+        """ 
+        Trys to return a string representing a frequency guess, 
+        generated by infer_freq.  Returns None if it can't autodetect the 
+        frequency.
+        """
         try:
             return infer_freq(self)
         except ValueError:
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index a63e9e55b8a07..6320c9a31962f 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -671,11 +671,11 @@ def _period_str_to_code(freqstr):
 def infer_freq(index, warn=True):
     """
     Infer the most likely frequency given the input index. If the frequency is
-    uncertain, a warning will be printed
+    uncertain, a warning will be printed. 
 
     Parameters
     ----------
-    index : DatetimeIndex
+    index : DatetimeIndex or TimedeltaIndex
             if passed a Series will use the values of the series (NOT THE INDEX)
     warn : boolean, default True
 
@@ -684,6 +684,7 @@ def infer_freq(index, warn=True):
     freq : string or None
         None if no discernible frequency
         TypeError if the index is not datetime-like
+        ValueError if there are less than three values.
     """
     import pandas as pd
 

From ffa2a50a45691c605362e41733335179e5165eee Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 5 May 2015 10:33:10 -0400
Subject: [PATCH 148/239] BUG: Bug in grouping with multiple pd.Grouper where
 one is non-time based (GH10063)

---
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 pandas/core/groupby.py          | 13 ++++++++++---
 pandas/tests/test_groupby.py    | 15 +++++++++++++++
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 88d4819b5f4c2..58d3397ffa2be 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -231,7 +231,7 @@ Bug Fixes
 - Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
 - Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
-
+- Bug in grouping with multiple ``pd.Grouper`` where one is non-time based (:issue:`10063`)
 - Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index f141790fbbd48..1f76d80c34a90 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -280,7 +280,10 @@ def _set_grouper(self, obj, sort=False):
         return self.grouper
 
     def _get_binner_for_grouping(self, obj):
-        raise AbstractMethodError(self)
+        """ default to the standard binner here """
+        group_axis = obj._get_axis(self.axis)
+        return Grouping(group_axis, None, obj=obj, name=self.key,
+                        level=self.level, sort=self.sort, in_axis=False)
 
     @property
     def groups(self):
@@ -1964,8 +1967,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 if self.name is None:
                     self.name = grouper.name
 
+            # we are done
+            if isinstance(self.grouper, Grouping):
+                self.grouper = self.grouper.grouper
+
             # no level passed
-            if not isinstance(self.grouper, (Series, Index, Categorical, np.ndarray)):
+            elif not isinstance(self.grouper, (Series, Index, Categorical, np.ndarray)):
                 if getattr(self.grouper, 'ndim', 1) != 1:
                     t = self.name or str(type(self.grouper))
                     raise ValueError("Grouper for '%s' not 1-dimensional" % t)
@@ -2834,7 +2841,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                     v = next(v for v in values if v is not None)
                 except StopIteration:
                     # If all values are None, then this will throw an error.
-                    # We'd prefer it return an empty dataframe. 
+                    # We'd prefer it return an empty dataframe.
                     return DataFrame()
                 if v is None:
                     return DataFrame()
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index d19271eaf986d..c308308603167 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -430,6 +430,21 @@ def test_grouper_creation_bug(self):
         expected = s.groupby(level='one').sum()
         assert_series_equal(result, expected)
 
+    def test_grouper_getting_correct_binner(self):
+
+        # GH 10063
+        # using a non-time-based grouper and a time-based grouper
+        # and specifying levels
+        df = DataFrame({'A' : 1 },
+                       index=pd.MultiIndex.from_product([list('ab'),
+                                                         date_range('20130101',periods=80)],
+                                                        names=['one','two']))
+        result = df.groupby([pd.Grouper(level='one'),pd.Grouper(level='two',freq='M')]).sum()
+        expected = DataFrame({'A' : [31,28,21,31,28,21]},
+                              index=MultiIndex.from_product([list('ab'),date_range('20130101',freq='M',periods=3)],
+                                                            names=['one','two']))
+        assert_frame_equal(result, expected)
+
     def test_grouper_iter(self):
         self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo'])
 

From de28406e52447679f789d6dc68ba91ba5cbea6e7 Mon Sep 17 00:00:00 2001
From: Andy Hayden <andyhayden1@gmail.com>
Date: Fri, 1 May 2015 18:52:27 -0700
Subject: [PATCH 149/239] ENH Add normalize method to Series dt accessor.

---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 pandas/tests/test_series.py     | 3 ++-
 pandas/tseries/common.py        | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 58d3397ffa2be..a04ef47304605 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -68,6 +68,7 @@ Enhancements
 - Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
 
 - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
+- Add ``normalize`` as a ``dt`` accessor method.
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index aa95986be0722..f4880fdbb5de4 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -83,7 +83,7 @@ def test_dt_namespace_accessor(self):
         ok_for_period = ok_for_base + ['qyear']
         ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start',
                                    'is_quarter_end', 'is_year_start', 'is_year_end', 'tz']
-        ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert']
+        ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize']
         ok_for_td = ['days','seconds','microseconds','nanoseconds']
         ok_for_td_methods = ['components','to_pytimedelta']
 
@@ -165,6 +165,7 @@ def compare(s, name):
         tm.assert_series_equal(s.dt.year,Series(np.array([2014,2014,2014],dtype='int64'),index=index))
         tm.assert_series_equal(s.dt.month,Series(np.array([2,2,2],dtype='int64'),index=index))
         tm.assert_series_equal(s.dt.second,Series(np.array([0,1,2],dtype='int64'),index=index))
+        tm.assert_series_equal(s.dt.normalize(), pd.Series([s[0]] * 3, index=index))
 
         # periodindex
         for s in [Series(period_range('20130101',periods=5,freq='D'))]:
diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py
index 2ceece087387e..8e468a7701462 100644
--- a/pandas/tseries/common.py
+++ b/pandas/tseries/common.py
@@ -125,7 +125,7 @@ def to_pydatetime(self):
                                            accessors=DatetimeIndex._datetimelike_ops,
                                            typ='property')
 DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex,
-                                           accessors=["to_period","tz_localize","tz_convert"],
+                                           accessors=["to_period","tz_localize","tz_convert","normalize"],
                                            typ='method')
 
 class TimedeltaProperties(Properties):

From a6e3a895caafbcdf3bf2b7acfa8e95026c30b679 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 5 May 2015 18:25:21 -0400
Subject: [PATCH 150/239] doc edits for GH10047

---
 doc/source/api.rst              | 1 +
 doc/source/whatsnew/v0.16.1.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index e63902a0910b3..34cecc3ecf215 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -492,6 +492,7 @@ These can be accessed like ``Series.dt.<property>``.
    Series.dt.to_pydatetime
    Series.dt.tz_localize
    Series.dt.tz_convert
+   Series.dt.normalize
 
 **Timedelta Properties**
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a04ef47304605..bd1ac45c382cb 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -68,7 +68,7 @@ Enhancements
 - Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
 
 - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
-- Add ``normalize`` as a ``dt`` accessor method.
+- Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`)
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 

From 1e7dda6d1fc24228802f94ecf1d6f67cfc7573ce Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 5 May 2015 07:09:55 -0400
Subject: [PATCH 151/239] DOC: add warning for datareader changes in 0.17.0
 (GH10004)

---
 doc/source/remote_data.rst      | 31 ++++++++++++++++++++++++-------
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst
index 65fcf600cdfd2..1992288fd4d00 100644
--- a/doc/source/remote_data.rst
+++ b/doc/source/remote_data.rst
@@ -25,6 +25,24 @@
 Remote Data Access
 ******************
 
+.. _remote_data.pandas_datareader:
+
+.. warning::
+
+   In pandas 0.17.0, the sub-package ``pandas.io.data`` will be removed in favor of a separately installable `pandas-datareader package <https://github.com/pydata/pandas-datareader>`_. This will allow the data modules to be independently updated to your pandas installation. The API for ``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. (:issue:`8961`)
+
+   You should replace the imports of the following:
+
+   .. code-block:: python
+
+      from pandas.io import data, wb
+
+   With:
+
+   .. code-block:: python
+
+      from pandas_datareader import data, wb
+
 .. _remote_data.data_reader:
 
 Functions from :mod:`pandas.io.data` and :mod:`pandas.io.ga` extract data from various Internet sources into a DataFrame. Currently the following sources are supported:
@@ -168,7 +186,7 @@ Indicators
 ~~~~~~~~~~
 
 Either from exploring the World Bank site, or using the search function included,
-every world bank indicator is accessible.  
+every world bank indicator is accessible.
 
 For example, if you wanted to compare the Gross Domestic Products per capita in
 constant dollars in North America, you would use the ``search`` function:
@@ -287,7 +305,7 @@ Country Codes
 
 .. versionadded:: 0.15.1
 
-The ``country`` argument accepts a string or list of mixed 
+The ``country`` argument accepts a string or list of mixed
 `two <http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2>`__ or `three <http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3>`__ character
 ISO country codes, as well as dynamic `World Bank exceptions <http://data.worldbank.org/node/18>`__ to the ISO standards.
 
@@ -298,10 +316,10 @@ Problematic Country Codes & Indicators
 
 .. note::
 
-   The World Bank's country list and indicators are dynamic. As of 0.15.1, 
+   The World Bank's country list and indicators are dynamic. As of 0.15.1,
    :func:`wb.download()` is more flexible.  To achieve this, the warning
    and exception logic changed.
-   
+
 The world bank converts some country codes in their response, which makes error
 checking by pandas difficult. Retired indicators still persist in the search.
 
@@ -320,12 +338,12 @@ There are at least 4 kinds of country codes:
 There are at least 3 kinds of indicators:
 
 1. Current - Returns data.
-2. Retired - Appears in search results, yet won't return data. 
+2. Retired - Appears in search results, yet won't return data.
 3. Bad - Will not return data.
 
 Use the ``errors`` argument to control warnings and exceptions.  Setting
 errors to ignore or warn, won't stop failed responses.  (ie, 100% bad
-indicators, or a single "bad" (#4 above) country code).  
+indicators, or a single "bad" (#4 above) country code).
 
 See docstrings for more info.
 
@@ -387,4 +405,3 @@ Detailed information in the following:
 * `pandas & google analytics, by yhat <http://blog.yhathq.com/posts/pandas-google-analytics.html>`__
 * `Google Analytics integration in pandas, by Chang She <http://quantabee.wordpress.com/2012/12/17/google-analytics-pandas/>`__
 * `Google Analytics Dimensions and Metrics Reference <https://developers.google.com/analytics/devguides/reporting/core/dimsmets>`_
-
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index bd1ac45c382cb..9f9d1a4475d2d 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -20,6 +20,10 @@ Highlights include:
 
 .. _whatsnew_0161.enhancements:
 
+.. warning::
+
+   In pandas 0.17.0, the sub-package ``pandas.io.data`` will be removed in favor of a separately installable package. See :ref:`here for details <remote_data.pandas_datareader>` (:issue:`8961`)
+
 Enhancements
 ~~~~~~~~~~~~
 

From 2c7a729db22217904643b70b244c24aa65857570 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 2 Aug 2014 13:08:14 +0900
Subject: [PATCH 152/239] ENH: Add BusinessHour offset

---
 doc/source/timeseries.rst                | 100 +++-
 doc/source/whatsnew/v0.16.1.txt          |  10 +
 pandas/tseries/frequencies.py            |  15 +-
 pandas/tseries/offsets.py                | 245 ++++++++-
 pandas/tseries/tests/test_frequencies.py |  33 +-
 pandas/tseries/tests/test_offsets.py     | 604 ++++++++++++++++++++++-
 pandas/tseries/tests/test_timeseries.py  |  25 +-
 7 files changed, 1016 insertions(+), 16 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index ac3302ae40fa7..a78fcf5224fc2 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -4,7 +4,7 @@
 .. ipython:: python
    :suppress:
 
-   from datetime import datetime, timedelta
+   from datetime import datetime, timedelta, time
    import numpy as np
    np.random.seed(123456)
    from pandas import *
@@ -482,6 +482,7 @@ frequency increment. Specific offset logic like "month", "business day", or
     BYearEnd, "business year end"
     BYearBegin, "business year begin"
     FY5253, "retail (aka 52-53 week) year"
+    BusinessHour, "business hour"
     Hour, "one hour"
     Minute, "one minute"
     Second, "one second"
@@ -667,6 +668,102 @@ in the usual way.
     have to change to fix the timezone issues, the behaviour of the
     ``CustomBusinessDay`` class may have to change in future versions.
 
+.. _timeseries.businesshour:
+
+Business Hour
+~~~~~~~~~~~~~
+
+The ``BusinessHour`` class provides a business hour representation on ``BusinessDay``,
+allowing to use specific start and end times.
+
+By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours.
+Adding ``BusinessHour`` will increment ``Timestamp`` by hourly.
+If target ``Timestamp`` is out of business hours, move to the next business hour then increment it.
+If the result exceeds the business hours end, remaining is added to the next business day.
+
+.. ipython:: python
+
+    bh = BusinessHour()
+    bh
+
+    # 2014-08-01 is Friday
+    Timestamp('2014-08-01 10:00').weekday()
+    Timestamp('2014-08-01 10:00') + bh
+
+    # Below example is the same as Timestamp('2014-08-01 09:00') + bh
+    Timestamp('2014-08-01 08:00') + bh
+
+    # If the results is on the end time, move to the next business day
+    Timestamp('2014-08-01 16:00') + bh
+
+    # Remainings are added to the next day
+    Timestamp('2014-08-01 16:30') + bh
+
+    # Adding 2 business hours
+    Timestamp('2014-08-01 10:00') + BusinessHour(2)
+
+    # Subtracting 3 business hours
+    Timestamp('2014-08-01 10:00') + BusinessHour(-3)
+
+Also, you can specify ``start`` and ``end`` time by keywords.
+Argument must be ``str`` which has ``hour:minute`` representation or ``datetime.time`` instance.
+Specifying seconds, microseconds and nanoseconds as business hour results in ``ValueError``.
+
+.. ipython:: python
+
+    bh = BusinessHour(start='11:00', end=time(20, 0))
+    bh
+
+    Timestamp('2014-08-01 13:00') + bh
+    Timestamp('2014-08-01 09:00') + bh
+    Timestamp('2014-08-01 18:00') + bh
+
+Passing ``start`` time later than ``end`` represents midnight business hour.
+In this case, business hour exceeds midnight and overlap to the next day.
+Valid business hours are distinguished by whether it started from valid ``BusinessDay``.
+
+.. ipython:: python
+
+    bh = BusinessHour(start='17:00', end='09:00')
+    bh
+
+    Timestamp('2014-08-01 17:00') + bh
+    Timestamp('2014-08-01 23:00') + bh
+
+    # Although 2014-08-02 is Satuaday,
+    # it is valid because it starts from 08-01 (Friday).
+    Timestamp('2014-08-02 04:00') + bh
+
+    # Although 2014-08-04 is Monday,
+    # it is out of business hours because it starts from 08-03 (Sunday).
+    Timestamp('2014-08-04 04:00') + bh
+
+Applying ``BusinessHour.rollforward`` and ``rollback`` to out of business hours results in
+the next business hour start or previous day's end. Different from other offsets, ``BusinessHour.rollforward``
+may output different results from ``apply`` by definition.
+
+This is because one day's business hour end is equal to next day's business hour start. For example,
+under the default business hours (9:00 - 17:00), there is no gap (0 minutes) between ``2014-08-01 17:00`` and
+``2014-08-04 09:00``.
+
+.. ipython:: python
+
+    # This adjusts a Timestamp to business hour edge
+    BusinessHour().rollback(Timestamp('2014-08-02 15:00'))
+    BusinessHour().rollforward(Timestamp('2014-08-02 15:00'))
+
+    # It is the same as BusinessHour().apply(Timestamp('2014-08-01 17:00')).
+    # And it is the same as BusinessHour().apply(Timestamp('2014-08-04 09:00'))
+    BusinessHour().apply(Timestamp('2014-08-02 15:00'))
+
+    # BusinessDay results (for reference)
+    BusinessHour().rollforward(Timestamp('2014-08-02'))
+
+    # It is the same as BusinessDay().apply(Timestamp('2014-08-01'))
+    # The result is the same as rollworward because BusinessDay never overlap.
+    BusinessHour().apply(Timestamp('2014-08-02'))
+
+
 Offset Aliases
 ~~~~~~~~~~~~~~
 
@@ -696,6 +793,7 @@ frequencies. We will refer to these aliases as *offset aliases*
     "BA", "business year end frequency"
     "AS", "year start frequency"
     "BAS", "business year start frequency"
+    "BH", "business hour frequency"
     "H", "hourly frequency"
     "T", "minutely frequency"
     "S", "secondly frequency"
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 9f9d1a4475d2d..33e5c883855a7 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -14,6 +14,8 @@ Highlights include:
 
 - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enchancements.sample>`
 
+- ``BusinessHour`` offset is supported, see :ref:`here <timeseries.businesshour>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
@@ -27,6 +29,14 @@ Highlights include:
 Enhancements
 ~~~~~~~~~~~~
 
+- ``BusinessHour`` offset is now supported, which represents business hours starting from 09:00 - 17:00 on ``BusinessDay`` by default. See :ref:`Here <timeseries.businesshour>` for details. (:issue:`7905`)
+
+  .. ipython:: python
+
+     Timestamp('2014-08-01 09:00') + BusinessHour()
+     Timestamp('2014-08-01 07:00') + BusinessHour()
+     Timestamp('2014-08-01 16:30') + BusinessHour()
+
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
 - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 6320c9a31962f..d0d71c63183fa 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -743,7 +743,7 @@ def __init__(self, index, warn=True):
     @cache_readonly
     def deltas(self):
         return tslib.unique_deltas(self.values)
-    
+
     @cache_readonly
     def deltas_asi8(self):
         return tslib.unique_deltas(self.index.asi8)
@@ -751,7 +751,7 @@ def deltas_asi8(self):
     @cache_readonly
     def is_unique(self):
         return len(self.deltas) == 1
-    
+
     @cache_readonly
     def is_unique_asi8(self):
         return len(self.deltas_asi8) == 1
@@ -764,10 +764,13 @@ def get_freq(self):
         if _is_multiple(delta, _ONE_DAY):
             return self._infer_daily_rule()
         else:
-            # Possibly intraday frequency.  Here we use the 
+            # Business hourly, maybe. 17: one day / 65: one weekend
+            if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
+                return 'BH'
+            # Possibly intraday frequency.  Here we use the
             # original .asi8 values as the modified values
             # will not work around DST transitions.  See #8772
-            if not self.is_unique_asi8:
+            elif not self.is_unique_asi8:
                 return None
             delta = self.deltas_asi8[0]
             if _is_multiple(delta, _ONE_HOUR):
@@ -793,6 +796,10 @@ def get_freq(self):
     def day_deltas(self):
         return [x / _ONE_DAY for x in self.deltas]
 
+    @cache_readonly
+    def hour_deltas(self):
+        return [x / _ONE_HOUR for x in self.deltas]
+
     @cache_readonly
     def fields(self):
         return tslib.build_field_sarray(self.values)
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index cb6bd2fb2b250..67e27bbffbf73 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -16,6 +16,7 @@
 __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay',
            'CBMonthEnd','CBMonthBegin',
            'MonthBegin', 'BMonthBegin', 'MonthEnd', 'BMonthEnd',
+           'BusinessHour',
            'YearBegin', 'BYearBegin', 'YearEnd', 'BYearEnd',
            'QuarterBegin', 'BQuarterBegin', 'QuarterEnd', 'BQuarterEnd',
            'LastWeekOfMonth', 'FY5253Quarter', 'FY5253',
@@ -404,10 +405,6 @@ def __repr__(self):
         if hasattr(self, '_named'):
             return self._named
         className = getattr(self, '_outputName', self.__class__.__name__)
-        attrs = []
-
-        if self.offset:
-            attrs = ['offset=%s' % repr(self.offset)]
 
         if abs(self.n) != 1:
             plural = 's'
@@ -418,10 +415,17 @@ def __repr__(self):
         if self.n != 1:
             n_str = "%s * " % self.n
 
-        out = '<%s' % n_str + className + plural
+        out = '<%s' % n_str + className + plural + self._repr_attrs() + '>'
+        return out
+
+    def _repr_attrs(self):
+        if self.offset:
+            attrs = ['offset=%s' % repr(self.offset)]
+        else:
+            attrs = None
+        out = ''
         if attrs:
             out += ': ' + ', '.join(attrs)
-        out += '>'
         return out
 
 class BusinessDay(BusinessMixin, SingleConstructorOffset):
@@ -531,6 +535,234 @@ def onOffset(self, dt):
         return dt.weekday() < 5
 
 
+class BusinessHour(BusinessMixin, SingleConstructorOffset):
+    """
+    DateOffset subclass representing possibly n business days
+    """
+    _prefix = 'BH'
+    _anchor = 0
+
+    def __init__(self, n=1, normalize=False, **kwds):
+        self.n = int(n)
+        self.normalize = normalize
+
+        # must be validated here to equality check
+        kwds['start'] = self._validate_time(kwds.get('start', '09:00'))
+        kwds['end'] = self._validate_time(kwds.get('end', '17:00'))
+        self.kwds = kwds
+        self.offset = kwds.get('offset', timedelta(0))
+        self.start = kwds.get('start', '09:00')
+        self.end = kwds.get('end', '17:00')
+
+        # used for moving to next businessday
+        if self.n >= 0:
+            self.next_bday = BusinessDay(n=1)
+        else:
+            self.next_bday = BusinessDay(n=-1)
+
+    def _validate_time(self, t_input):
+        from datetime import time as dt_time
+        import time
+        if isinstance(t_input, compat.string_types):
+            try:
+                t = time.strptime(t_input, '%H:%M')
+                return dt_time(hour=t.tm_hour, minute=t.tm_min)
+            except ValueError:
+                raise ValueError("time data must match '%H:%M' format")
+        elif isinstance(t_input, dt_time):
+            if t_input.second != 0 or t_input.microsecond != 0:
+                raise ValueError("time data must be specified only with hour and minute")
+            return t_input
+        else:
+            raise ValueError("time data must be string or datetime.time")
+
+    def _get_daytime_flag(self):
+        if self.start == self.end:
+            raise ValueError('start and end must not be the same')
+        elif self.start < self.end:
+            return True
+        else:
+            return False
+
+    def _repr_attrs(self):
+        out = super(BusinessHour, self)._repr_attrs()
+        attrs = ['BH=%s-%s' % (self.start.strftime('%H:%M'),
+                               self.end.strftime('%H:%M'))]
+        out += ': ' + ', '.join(attrs)
+        return out
+
+    def _next_opening_time(self, other):
+        """
+        If n is positive, return tomorrow's business day opening time.
+        Otherwise yesterday's business day's opening time.
+
+        Opening time always locates on BusinessDay.
+        Otherwise, closing time may not if business hour extends over midnight.
+        """
+        if not self.next_bday.onOffset(other):
+            other = other + self.next_bday
+        else:
+            if self.n >= 0 and self.start < other.time():
+                other = other + self.next_bday
+            elif self.n < 0 and other.time() < self.start:
+                other = other + self.next_bday
+        return datetime(other.year, other.month, other.day,
+                        self.start.hour, self.start.minute)
+
+    def _prev_opening_time(self, other):
+        """
+        If n is positive, return yesterday's business day opening time.
+        Otherwise yesterday business day's opening time.
+        """
+        if not self.next_bday.onOffset(other):
+            other = other - self.next_bday
+        else:
+            if self.n >= 0 and other.time() < self.start:
+                other = other - self.next_bday
+            elif self.n < 0 and other.time() > self.start:
+                other = other - self.next_bday
+        return datetime(other.year, other.month, other.day,
+                        self.start.hour, self.start.minute)
+
+    def _get_business_hours_by_sec(self):
+        """
+        Return business hours in a day by seconds.
+        """
+        if self._get_daytime_flag():
+            # create dummy datetime to calcurate businesshours in a day
+            dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
+            until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
+            return tslib.tot_seconds(until - dtstart)
+        else:
+            self.daytime = False
+            dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
+            until = datetime(2014, 4, 2, self.end.hour, self.end.minute)
+            return tslib.tot_seconds(until - dtstart)
+
+    @apply_wraps
+    def rollback(self, dt):
+        """Roll provided date backward to next offset only if not on offset"""
+        if not self.onOffset(dt):
+            businesshours = self._get_business_hours_by_sec()
+            if self.n >= 0:
+                dt = self._prev_opening_time(dt) + timedelta(seconds=businesshours)
+            else:
+                dt = self._next_opening_time(dt) + timedelta(seconds=businesshours)
+        return dt
+
+    @apply_wraps
+    def rollforward(self, dt):
+        """Roll provided date forward to next offset only if not on offset"""
+        if not self.onOffset(dt):
+            if self.n >= 0:
+                return self._next_opening_time(dt)
+            else:
+                return self._prev_opening_time(dt)
+        return dt
+
+    @apply_wraps
+    def apply(self, other):
+        # calcurate here because offset is not immutable
+        daytime = self._get_daytime_flag()
+        businesshours = self._get_business_hours_by_sec()
+        bhdelta = timedelta(seconds=businesshours)
+
+        if isinstance(other, datetime):
+            # used for detecting edge condition
+            nanosecond = getattr(other, 'nanosecond', 0)
+            # reset timezone and nanosecond
+            # other may be a Timestamp, thus not use replace
+            other = datetime(other.year, other.month, other.day,
+                             other.hour, other.minute,
+                             other.second, other.microsecond)
+            n = self.n
+            if n >= 0:
+                if (other.time() == self.end or
+                    not self._onOffset(other, businesshours)):
+                    other = self._next_opening_time(other)
+            else:
+                if other.time() == self.start:
+                    # adjustment to move to previous business day
+                    other = other - timedelta(seconds=1)
+                if not self._onOffset(other, businesshours):
+                    other = self._next_opening_time(other)
+                    other = other + bhdelta
+
+            bd, r = divmod(abs(n * 60), businesshours // 60)
+            if n < 0:
+                bd, r = -bd, -r
+
+            if bd != 0:
+                skip_bd = BusinessDay(n=bd)
+                # midnight busienss hour may not on BusinessDay
+                if not self.next_bday.onOffset(other):
+                    remain = other - self._prev_opening_time(other)
+                    other = self._next_opening_time(other + skip_bd) + remain
+                else:
+                    other = other + skip_bd
+
+            hours, minutes = divmod(r, 60)
+            result = other + timedelta(hours=hours, minutes=minutes)
+
+            # because of previous adjustment, time will be larger than start
+            if ((daytime and (result.time() < self.start or self.end < result.time())) or
+                not daytime and (self.end < result.time() < self.start)):
+                if n >= 0:
+                    bday_edge = self._prev_opening_time(other)
+                    bday_edge = bday_edge + bhdelta
+                    # calcurate remainder
+                    bday_remain = result - bday_edge
+                    result = self._next_opening_time(other)
+                    result += bday_remain
+                else:
+                    bday_edge = self._next_opening_time(other)
+                    bday_remain = result - bday_edge
+                    result = self._next_opening_time(result) + bhdelta
+                    result += bday_remain
+            # edge handling
+            if n >= 0:
+                if result.time() == self.end:
+                    result = self._next_opening_time(result)
+            else:
+                if result.time() == self.start and nanosecond == 0:
+                    # adjustment to move to previous business day
+                    result = self._next_opening_time(result- timedelta(seconds=1)) +bhdelta
+
+            return result
+        else:
+            raise ApplyTypeError('Only know how to combine business hour with ')
+
+    def onOffset(self, dt):
+        if self.normalize and not _is_normalized(dt):
+            return False
+
+        if dt.tzinfo is not None:
+            dt = datetime(dt.year, dt.month, dt.day, dt.hour,
+                          dt.minute, dt.second, dt.microsecond)
+        # Valid BH can be on the different BusinessDay during midnight
+        # Distinguish by the time spent from previous opening time
+        businesshours = self._get_business_hours_by_sec()
+        return self._onOffset(dt, businesshours)
+
+    def _onOffset(self, dt, businesshours):
+        """
+        Slight speedups using calcurated values
+        """
+        # if self.normalize and not _is_normalized(dt):
+        #     return False
+        # Valid BH can be on the different BusinessDay during midnight
+        # Distinguish by the time spent from previous opening time
+        if self.n >= 0:
+            op = self._prev_opening_time(dt)
+        else:
+            op = self._next_opening_time(dt)
+        span = tslib.tot_seconds(dt - op)
+        if span <= businesshours:
+            return True
+        else:
+            return False
+
+
 class CustomBusinessDay(BusinessDay):
     """
     **EXPERIMENTAL** DateOffset subclass representing possibly n business days
@@ -2250,6 +2482,7 @@ def generate_range(start=None, end=None, periods=None,
     BusinessMonthEnd,         # 'BM'
     BQuarterEnd,              # 'BQ'
     BQuarterBegin,            # 'BQS'
+    BusinessHour,             # 'BH'
     CustomBusinessDay,        # 'C'
     CustomBusinessMonthEnd,   # 'CBM'
     CustomBusinessMonthBegin, # 'CBMS'
diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py
index 965c198eb7c95..2f2d249539b81 100644
--- a/pandas/tseries/tests/test_frequencies.py
+++ b/pandas/tseries/tests/test_frequencies.py
@@ -196,6 +196,7 @@ def _check_tick(self, base_delta, code):
 
         index = _dti([b + base_delta * j for j in range(3)] +
                      [b + base_delta * 7])
+
         self.assertIsNone(frequencies.infer_freq(index))
 
     def test_weekly(self):
@@ -324,10 +325,40 @@ def test_infer_freq_tz_transition(self):
                     idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz)
                     print(idx)
                     self.assertEqual(idx.inferred_freq, freq)
-                
+
         index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago")
         self.assertIsNone(index.inferred_freq)
 
+    def test_infer_freq_businesshour(self):
+        # GH 7905
+        idx = DatetimeIndex(['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00',
+                             '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00'])
+        # hourly freq in a day must result in 'H'
+        self.assertEqual(idx.inferred_freq, 'H')
+
+        idx = DatetimeIndex(['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00',
+                             '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00',
+                             '2014-07-01 15:00', '2014-07-01 16:00',
+                             '2014-07-02 09:00', '2014-07-02 10:00', '2014-07-02 11:00'])
+        self.assertEqual(idx.inferred_freq, 'BH')
+
+        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00',
+                             '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00',
+                             '2014-07-04 15:00', '2014-07-04 16:00',
+                             '2014-07-07 09:00', '2014-07-07 10:00', '2014-07-07 11:00'])
+        self.assertEqual(idx.inferred_freq, 'BH')
+
+        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00',
+                             '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00',
+                             '2014-07-04 15:00', '2014-07-04 16:00',
+                             '2014-07-07 09:00', '2014-07-07 10:00', '2014-07-07 11:00',
+                             '2014-07-07 12:00', '2014-07-07 13:00', '2014-07-07 14:00',
+                             '2014-07-07 15:00', '2014-07-07 16:00',
+                             '2014-07-08 09:00', '2014-07-08 10:00', '2014-07-08 11:00',
+                             '2014-07-08 12:00', '2014-07-08 13:00', '2014-07-08 14:00',
+                             '2014-07-08 15:00', '2014-07-08 16:00'])
+        self.assertEqual(idx.inferred_freq, 'BH')
+
     def test_not_monotonic(self):
         rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002'])
         rng = rng[::-1]
diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py
index 0793508b4912c..a051560617604 100644
--- a/pandas/tseries/tests/test_offsets.py
+++ b/pandas/tseries/tests/test_offsets.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from pandas.core.datetools import (
-    bday, BDay, CDay, BQuarterEnd, BMonthEnd,
+    bday, BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour,
     CBMonthEnd, CBMonthBegin,
     BYearEnd, MonthEnd, MonthBegin, BYearBegin, CustomBusinessDay,
     QuarterBegin, BQuarterBegin, BMonthBegin, DateOffset, Week,
@@ -23,7 +23,6 @@
 from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache, date_range
 from pandas.tseries.tools import parse_time_string, DateParseError
 import pandas.tseries.offsets as offsets
-
 from pandas.io.pickle import read_pickle
 from pandas.tslib import NaT, Timestamp, Timedelta
 import pandas.tslib as tslib
@@ -133,7 +132,11 @@ def test_apply_out_of_range(self):
         # try to create an out-of-bounds result timestamp; if we can't create the offset
         # skip
         try:
-            offset = self._get_offset(self._offset, value=10000)
+            if self._offset is BusinessHour:
+                # Using 10000 in BusinessHour fails in tz check because of DST difference
+                offset = self._get_offset(self._offset, value=100000)
+            else:
+                offset = self._get_offset(self._offset, value=10000)
 
             result = Timestamp('20080101') + offset
             self.assertIsInstance(result, datetime)
@@ -179,6 +182,7 @@ def setUp(self):
                           'BQuarterBegin': Timestamp('2011-03-01 09:00:00'),
                           'QuarterEnd': Timestamp('2011-03-31 09:00:00'),
                           'BQuarterEnd': Timestamp('2011-03-31 09:00:00'),
+                          'BusinessHour': Timestamp('2011-01-03 10:00:00'),
                           'WeekOfMonth': Timestamp('2011-01-08 09:00:00'),
                           'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'),
                           'FY5253Quarter': Timestamp('2011-01-25 09:00:00'),
@@ -278,6 +282,8 @@ def test_rollforward(self):
         for n in no_changes:
             expecteds[n] = Timestamp('2011/01/01 09:00')
 
+        expecteds['BusinessHour'] = Timestamp('2011-01-03 09:00:00')
+
         # but be changed when normalize=True
         norm_expected = expecteds.copy()
         for k in norm_expected:
@@ -321,6 +327,7 @@ def test_rollback(self):
                      'BQuarterBegin': Timestamp('2010-12-01 09:00:00'),
                      'QuarterEnd': Timestamp('2010-12-31 09:00:00'),
                      'BQuarterEnd': Timestamp('2010-12-31 09:00:00'),
+                     'BusinessHour': Timestamp('2010-12-31 17:00:00'),
                      'WeekOfMonth': Timestamp('2010-12-11 09:00:00'),
                      'LastWeekOfMonth': Timestamp('2010-12-25 09:00:00'),
                      'FY5253Quarter': Timestamp('2010-10-26 09:00:00'),
@@ -371,6 +378,10 @@ def test_onOffset(self):
             offset_n = self._get_offset(offset, normalize=True)
             self.assertFalse(offset_n.onOffset(dt))
 
+            if offset is BusinessHour:
+                # In default BusinessHour (9:00-17:00), normalized time
+                # cannot be in business hour range
+                continue
             date = datetime(dt.year, dt.month, dt.day)
             self.assertTrue(offset_n.onOffset(date))
 
@@ -642,6 +653,593 @@ def test_offsets_compare_equal(self):
         self.assertFalse(offset1 != offset2)
 
 
+class TestBusinessHour(Base):
+    _multiprocess_can_split_ = True
+    _offset = BusinessHour
+
+    def setUp(self):
+        self.d = datetime(2014, 7, 1, 10, 00)
+
+        self.offset1 = BusinessHour()
+        self.offset2 = BusinessHour(n=3)
+
+        self.offset3 = BusinessHour(n=-1)
+        self.offset4 = BusinessHour(n=-4)
+
+        from datetime import time as dt_time
+        self.offset5 = BusinessHour(start=dt_time(11, 0), end=dt_time(14, 30))
+        self.offset6 = BusinessHour(start='20:00', end='05:00')
+        self.offset7 = BusinessHour(n=-2, start=dt_time(21, 30), end=dt_time(6, 30))
+
+    def test_constructor_errors(self):
+        from datetime import time as dt_time
+        with tm.assertRaises(ValueError):
+            BusinessHour(start=dt_time(11, 0, 5))
+        with tm.assertRaises(ValueError):
+            BusinessHour(start='AAA')
+        with tm.assertRaises(ValueError):
+            BusinessHour(start='14:00:05')
+
+    def test_different_normalize_equals(self):
+        # equivalent in this special case
+        offset = self._offset()
+        offset2 = self._offset()
+        offset2.normalize = True
+        self.assertEqual(offset, offset2)
+
+    def test_repr(self):
+        self.assertEqual(repr(self.offset1), '<BusinessHour: BH=09:00-17:00>')
+        self.assertEqual(repr(self.offset2), '<3 * BusinessHours: BH=09:00-17:00>')
+        self.assertEqual(repr(self.offset3), '<-1 * BusinessHour: BH=09:00-17:00>')
+        self.assertEqual(repr(self.offset4), '<-4 * BusinessHours: BH=09:00-17:00>')
+
+        self.assertEqual(repr(self.offset5), '<BusinessHour: BH=11:00-14:30>')
+        self.assertEqual(repr(self.offset6), '<BusinessHour: BH=20:00-05:00>')
+        self.assertEqual(repr(self.offset7), '<-2 * BusinessHours: BH=21:30-06:30>')
+
+    def test_with_offset(self):
+        expected = Timestamp('2014-07-01 13:00')
+
+        self.assertEqual(self.d + BusinessHour() * 3, expected)
+        self.assertEqual(self.d + BusinessHour(n=3), expected)
+
+    def testEQ(self):
+        for offset in [self.offset1, self.offset2, self.offset3, self.offset4]:
+            self.assertEqual(offset, offset)
+
+        self.assertNotEqual(BusinessHour(), BusinessHour(-1))
+        self.assertEqual(BusinessHour(start='09:00'), BusinessHour())
+        self.assertNotEqual(BusinessHour(start='09:00'), BusinessHour(start='09:01'))
+        self.assertNotEqual(BusinessHour(start='09:00', end='17:00'),
+                                         BusinessHour(start='17:00', end='09:01'))
+
+    def test_hash(self):
+        self.assertEqual(hash(self.offset2), hash(self.offset2))
+
+    def testCall(self):
+        self.assertEqual(self.offset1(self.d), datetime(2014, 7, 1, 11))
+        self.assertEqual(self.offset2(self.d), datetime(2014, 7, 1, 13))
+        self.assertEqual(self.offset3(self.d), datetime(2014, 6, 30, 17))
+        self.assertEqual(self.offset4(self.d), datetime(2014, 6, 30, 14))
+
+    def testRAdd(self):
+        self.assertEqual(self.d + self.offset2, self.offset2 + self.d)
+
+    def testSub(self):
+        off = self.offset2
+        self.assertRaises(Exception, off.__sub__, self.d)
+        self.assertEqual(2 * off - off, off)
+
+        self.assertEqual(self.d - self.offset2, self.d + self._offset(-3))
+
+    def testRSub(self):
+        self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d))
+
+    def testMult1(self):
+        self.assertEqual(self.d + 5 * self.offset1, self.d + self._offset(5))
+
+    def testMult2(self):
+        self.assertEqual(self.d + (-3 * self._offset(-2)),
+                         self.d + self._offset(6))
+
+    def testRollback1(self):
+        self.assertEqual(self.offset1.rollback(self.d), self.d)
+        self.assertEqual(self.offset2.rollback(self.d), self.d)
+        self.assertEqual(self.offset3.rollback(self.d), self.d)
+        self.assertEqual(self.offset4.rollback(self.d), self.d)
+        self.assertEqual(self.offset5.rollback(self.d), datetime(2014, 6, 30, 14, 30))
+        self.assertEqual(self.offset6.rollback(self.d), datetime(2014, 7, 1, 5, 0))
+        self.assertEqual(self.offset7.rollback(self.d), datetime(2014, 7, 1, 6, 30))
+
+        d = datetime(2014, 7, 1, 0)
+        self.assertEqual(self.offset1.rollback(d), datetime(2014, 6, 30, 17))
+        self.assertEqual(self.offset2.rollback(d), datetime(2014, 6, 30, 17))
+        self.assertEqual(self.offset3.rollback(d), datetime(2014, 6, 30, 17))
+        self.assertEqual(self.offset4.rollback(d), datetime(2014, 6, 30, 17))
+        self.assertEqual(self.offset5.rollback(d), datetime(2014, 6, 30, 14, 30))
+        self.assertEqual(self.offset6.rollback(d), d)
+        self.assertEqual(self.offset7.rollback(d), d)
+
+        self.assertEqual(self._offset(5).rollback(self.d), self.d)
+
+    def testRollback2(self):
+        self.assertEqual(self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)),
+                         datetime(2014, 7, 4, 17, 0))
+
+    def testRollforward1(self):
+        self.assertEqual(self.offset1.rollforward(self.d), self.d)
+        self.assertEqual(self.offset2.rollforward(self.d), self.d)
+        self.assertEqual(self.offset3.rollforward(self.d), self.d)
+        self.assertEqual(self.offset4.rollforward(self.d), self.d)
+        self.assertEqual(self.offset5.rollforward(self.d), datetime(2014, 7, 1, 11, 0))
+        self.assertEqual(self.offset6.rollforward(self.d), datetime(2014, 7, 1, 20, 0))
+        self.assertEqual(self.offset7.rollforward(self.d), datetime(2014, 7, 1, 21, 30))
+
+        d = datetime(2014, 7, 1, 0)
+        self.assertEqual(self.offset1.rollforward(d), datetime(2014, 7, 1, 9))
+        self.assertEqual(self.offset2.rollforward(d), datetime(2014, 7, 1, 9))
+        self.assertEqual(self.offset3.rollforward(d), datetime(2014, 7, 1, 9))
+        self.assertEqual(self.offset4.rollforward(d), datetime(2014, 7, 1, 9))
+        self.assertEqual(self.offset5.rollforward(d), datetime(2014, 7, 1, 11))
+        self.assertEqual(self.offset6.rollforward(d), d)
+        self.assertEqual(self.offset7.rollforward(d), d)
+
+        self.assertEqual(self._offset(5).rollforward(self.d), self.d)
+
+    def testRollforward2(self):
+        self.assertEqual(self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)),
+                         datetime(2014, 7, 7, 9))
+
+    def test_roll_date_object(self):
+        offset = BusinessHour()
+
+        dt = datetime(2014, 7, 6, 15, 0)
+
+        result = offset.rollback(dt)
+        self.assertEqual(result, datetime(2014, 7, 4, 17))
+
+        result = offset.rollforward(dt)
+        self.assertEqual(result, datetime(2014, 7, 7, 9))
+
+    def test_normalize(self):
+        tests = []
+
+        tests.append((BusinessHour(normalize=True),
+                      {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 2),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 2),
+                       datetime(2014, 7, 1, 23): datetime(2014, 7, 2),
+                       datetime(2014, 7, 1, 0): datetime(2014, 7, 1),
+                       datetime(2014, 7, 4, 15): datetime(2014, 7, 4),
+                       datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4),
+                       datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7),
+                       datetime(2014, 7, 5, 23): datetime(2014, 7, 7),
+                       datetime(2014, 7, 6, 10): datetime(2014, 7, 7)}))
+
+        tests.append((BusinessHour(-1, normalize=True),
+                      {datetime(2014, 7, 1, 8): datetime(2014, 6, 30),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 1),
+                       datetime(2014, 7, 1, 10): datetime(2014, 6, 30),
+                       datetime(2014, 7, 1, 0): datetime(2014, 6, 30),
+                       datetime(2014, 7, 7, 10): datetime(2014, 7, 4),
+                       datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7),
+                       datetime(2014, 7, 5, 23): datetime(2014, 7, 4),
+                       datetime(2014, 7, 6, 10): datetime(2014, 7, 4)}))
+
+        tests.append((BusinessHour(1, normalize=True, start='17:00', end='04:00'),
+                      {datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
+                       datetime(2014, 7, 1, 23): datetime(2014, 7, 2),
+                       datetime(2014, 7, 2, 2): datetime(2014, 7, 2),
+                       datetime(2014, 7, 2, 3): datetime(2014, 7, 2),
+                       datetime(2014, 7, 4, 23): datetime(2014, 7, 5),
+                       datetime(2014, 7, 5, 2): datetime(2014, 7, 5),
+                       datetime(2014, 7, 7, 2): datetime(2014, 7, 7),
+                       datetime(2014, 7, 7, 17): datetime(2014, 7, 7)}))
+
+        for offset, cases in tests:
+            for dt, expected in compat.iteritems(cases):
+                self.assertEqual(offset.apply(dt), expected)
+
+    def test_onOffset(self):
+        tests = []
+
+        tests.append((BusinessHour(),
+                     {datetime(2014, 7, 1, 9): True,
+                      datetime(2014, 7, 1, 8, 59): False,
+                      datetime(2014, 7, 1, 8): False,
+                      datetime(2014, 7, 1, 17): True,
+                      datetime(2014, 7, 1, 17, 1): False,
+                      datetime(2014, 7, 1, 18): False,
+                      datetime(2014, 7, 5, 9): False,
+                      datetime(2014, 7, 6, 12): False}))
+
+        tests.append((BusinessHour(start='10:00', end='15:00'),
+                     {datetime(2014, 7, 1, 9): False,
+                      datetime(2014, 7, 1, 10): True,
+                      datetime(2014, 7, 1, 15): True,
+                      datetime(2014, 7, 1, 15, 1): False,
+                      datetime(2014, 7, 5, 12): False,
+                      datetime(2014, 7, 6, 12): False}))
+
+        tests.append((BusinessHour(start='19:00', end='05:00'),
+                     {datetime(2014, 7, 1, 9, 0): False,
+                      datetime(2014, 7, 1, 10, 0): False,
+                      datetime(2014, 7, 1, 15): False,
+                      datetime(2014, 7, 1, 15, 1): False,
+                      datetime(2014, 7, 5, 12, 0): False,
+                      datetime(2014, 7, 6, 12, 0): False,
+                      datetime(2014, 7, 1, 19, 0): True,
+                      datetime(2014, 7, 2, 0, 0): True,
+                      datetime(2014, 7, 4, 23): True,
+                      datetime(2014, 7, 5, 1): True,
+                      datetime(2014, 7, 5, 5, 0): True,
+                      datetime(2014, 7, 6, 23, 0): False,
+                      datetime(2014, 7, 7, 3, 0): False}))
+
+        for offset, cases in tests:
+            for dt, expected in compat.iteritems(cases):
+                self.assertEqual(offset.onOffset(dt), expected)
+
+    def test_opening_time(self):
+        tests = []
+
+        # opening time should be affected by sign of n, not by n's value and end
+        tests.append(([BusinessHour(), BusinessHour(n=2), BusinessHour(n=4),
+                       BusinessHour(end='10:00'), BusinessHour(n=2, end='4:00'),
+                       BusinessHour(n=4, end='15:00')],
+                      {datetime(2014, 7, 1, 11): (datetime(2014, 7, 2, 9), datetime(2014, 7, 1, 9)),
+                       datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 9), datetime(2014, 7, 1, 9)),
+                       datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 9), datetime(2014, 7, 1, 9)),
+                       datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 9), datetime(2014, 7, 1, 9)),
+                       # if timestamp is on opening time, next opening time is as it is
+                       datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 2, 10): (datetime(2014, 7, 3, 9), datetime(2014, 7, 2, 9)),
+                       # 2014-07-05 is saturday
+                       datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 9), datetime(2014, 7, 4, 9)),
+                       datetime(2014, 7, 4, 10): (datetime(2014, 7, 7, 9), datetime(2014, 7, 4, 9)),
+                       datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 9), datetime(2014, 7, 4, 9)),
+                       datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 9), datetime(2014, 7, 4, 9)),
+                       datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 9), datetime(2014, 7, 4, 9)),
+                       datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 8, 9), datetime(2014, 7, 7, 9))}))
+
+        tests.append(([BusinessHour(start='11:15'), BusinessHour(n=2, start='11:15'),
+                       BusinessHour(n=3, start='11:15'),
+                       BusinessHour(start='11:15', end='10:00'),
+                       BusinessHour(n=2, start='11:15', end='4:00'),
+                       BusinessHour(n=3, start='11:15', end='15:00')],
+                      {datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 11, 15), datetime(2014, 6, 30, 11, 15)),
+                       datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)),
+                       datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)),
+                       datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)),
+                       datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)),
+                       datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)),
+                       datetime(2014, 7, 2, 11, 15): (datetime(2014, 7, 2, 11, 15), datetime(2014, 7, 2, 11, 15)),
+                       datetime(2014, 7, 2, 11, 15, 1): (datetime(2014, 7, 3, 11, 15), datetime(2014, 7, 2, 11, 15)),
+                       datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)),
+                       datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 11, 15), datetime(2014, 7, 3, 11, 15)),
+                       datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)),
+                       datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)),
+                       datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)),
+                       datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15))}))
+
+        tests.append(([BusinessHour(-1), BusinessHour(n=-2), BusinessHour(n=-4),
+                       BusinessHour(n=-1, end='10:00'), BusinessHour(n=-2, end='4:00'),
+                       BusinessHour(n=-4, end='15:00')],
+                      {datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), datetime(2014, 7, 2, 9)),
+                       datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 9), datetime(2014, 7, 3, 9)),
+                       datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 7, 9): (datetime(2014, 7, 7, 9), datetime(2014, 7, 7, 9)),
+                       datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 9), datetime(2014, 7, 8, 9))}))
+
+        tests.append(([BusinessHour(start='17:00', end='05:00'),
+                       BusinessHour(n=3, start='17:00', end='03:00')],
+                      {datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 17), datetime(2014, 6, 30, 17)),
+                       datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 17), datetime(2014, 7, 1, 17)),
+                       datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 17), datetime(2014, 7, 1, 17)),
+                       datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 17), datetime(2014, 7, 1, 17)),
+                       datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 17), datetime(2014, 7, 1, 17)),
+                       datetime(2014, 7, 4, 17): (datetime(2014, 7, 4, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 17), datetime(2014, 7, 3, 17)),
+                       datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 7, 17, 1): (datetime(2014, 7, 8, 17), datetime(2014, 7, 7, 17)),}))
+
+        tests.append(([BusinessHour(-1, start='17:00', end='05:00'),
+                       BusinessHour(n=-2, start='17:00', end='03:00')],
+                      {datetime(2014, 7, 1, 11): (datetime(2014, 6, 30, 17), datetime(2014, 7, 1, 17)),
+                       datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 17), datetime(2014, 7, 2, 17)),
+                       datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 17), datetime(2014, 7, 2, 17)),
+                       datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 17), datetime(2014, 7, 2, 17)),
+                       datetime(2014, 7, 2, 9): (datetime(2014, 7, 1, 17), datetime(2014, 7, 2, 17)),
+                       datetime(2014, 7, 2, 16, 59): (datetime(2014, 7, 1, 17), datetime(2014, 7, 2, 17)),
+                       datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 17), datetime(2014, 7, 7, 17)),
+                       datetime(2014, 7, 4, 10): (datetime(2014, 7, 3, 17), datetime(2014, 7, 4, 17)),
+                       datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 17), datetime(2014, 7, 7, 17)),
+                       datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 17), datetime(2014, 7, 7, 17)),
+                       datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 17), datetime(2014, 7, 7, 17)),
+                       datetime(2014, 7, 7, 18): (datetime(2014, 7, 7, 17), datetime(2014, 7, 8, 17))}))
+
+        for offsets, cases in tests:
+            for offset in offsets:
+                for dt, (exp_next, exp_prev) in compat.iteritems(cases):
+                    self.assertEqual(offset._next_opening_time(dt), exp_next)
+                    self.assertEqual(offset._prev_opening_time(dt), exp_prev)
+
+    def test_apply(self):
+        tests = []
+
+        tests.append((BusinessHour(),
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12),
+                       datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 9),
+                       datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 2, 9, 30, 15),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 10),
+                       datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 12),
+                       # out of business hours
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10),
+                       datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10),
+                       # saturday
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10),
+                       datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10),
+                       datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30),
+                       datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30)}))
+
+        tests.append((BusinessHour(4),
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15),
+                       datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 11),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 12),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 15),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13),
+                       datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13),
+                       datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13),
+                       datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30),
+                       datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30)}))
+
+        tests.append((BusinessHour(-1),
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 10),
+                       datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 12),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 14),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 15),
+                       datetime(2014, 7, 1, 10): datetime(2014, 6, 30, 17),
+                       datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 15, 30, 15),
+                       datetime(2014, 7, 1, 9, 30, 15): datetime(2014, 6, 30, 16, 30, 15),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 1, 5): datetime(2014, 6, 30, 16),
+                       datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 10),
+                       # out of business hours
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 16),
+                       datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 16),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 16),
+                       # saturday
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 16),
+                       datetime(2014, 7, 7, 9): datetime(2014, 7, 4, 16),
+                       datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 16, 30),
+                       datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 16, 30, 30)}))
+
+        tests.append((BusinessHour(-4),
+                      {datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 15),
+                       datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 11),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 12),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13),
+                       datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13),
+                       datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 13),
+                       datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 13, 30),
+                       datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 13, 30, 30)}))
+
+        tests.append((BusinessHour(start='13:00', end='16:00'),
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14),
+                       datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 14),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 14),
+                       datetime(2014, 7, 1, 15, 30, 15): datetime(2014, 7, 2, 13, 30, 15),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 14),
+                       datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14)}))
+
+        tests.append((BusinessHour(n=2, start='13:00', end='16:00'),
+                      {datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 15),
+                       datetime(2014, 7, 2, 14): datetime(2014, 7, 3, 13),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15),
+                       datetime(2014, 7, 2, 14, 30): datetime(2014, 7, 3, 13, 30),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15),
+                       datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 15),
+                       datetime(2014, 7, 4, 14, 30): datetime(2014, 7, 7, 13, 30),
+                       datetime(2014, 7, 4, 14, 30, 30): datetime(2014, 7, 7, 13, 30, 30)}))
+
+        tests.append((BusinessHour(n=-1, start='13:00', end='16:00'),
+                      {datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15),
+                       datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 15),
+                       datetime(2014, 7, 2, 14): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 2, 15): datetime(2014, 7, 2, 14),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 15),
+                       datetime(2014, 7, 2, 16): datetime(2014, 7, 2, 15),
+                       datetime(2014, 7, 2, 13, 30, 15): datetime(2014, 7, 1, 15, 30, 15),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 15),
+                       datetime(2014, 7, 7, 11): datetime(2014, 7, 4, 15)}))
+
+        tests.append((BusinessHour(n=-3, start='10:00', end='16:00'),
+                      {datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13),
+                       datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 11),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13),
+                       datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 2, 11, 30): datetime(2014, 7, 1, 14, 30),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13),
+                       datetime(2014, 7, 4, 10): datetime(2014, 7, 3, 13),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13),
+                       datetime(2014, 7, 4, 16): datetime(2014, 7, 4, 13),
+                       datetime(2014, 7, 4, 12, 30): datetime(2014, 7, 3, 15, 30),
+                       datetime(2014, 7, 4, 12, 30, 30): datetime(2014, 7, 3, 15, 30, 30)}))
+
+        tests.append((BusinessHour(start='19:00', end='05:00'),
+                      {datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 20),
+                       datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 20),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 20),
+                       datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 20),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 20),
+                       datetime(2014, 7, 2, 4, 30): datetime(2014, 7, 2, 19, 30),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 1),
+                       datetime(2014, 7, 4, 10): datetime(2014, 7, 4, 20),
+                       datetime(2014, 7, 4, 23): datetime(2014, 7, 5, 0),
+                       datetime(2014, 7, 5, 0): datetime(2014, 7, 5, 1),
+                       datetime(2014, 7, 5, 4): datetime(2014, 7, 7, 19),
+                       datetime(2014, 7, 5, 4, 30): datetime(2014, 7, 7, 19, 30),
+                       datetime(2014, 7, 5, 4, 30, 30): datetime(2014, 7, 7, 19, 30, 30)}))
+
+        tests.append((BusinessHour(n=-1, start='19:00', end='05:00'),
+                      {datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4),
+                       datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4),
+                       datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4),
+                       datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4),
+                       datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23),
+                       datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4),
+                       datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22),
+                       datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23),
+                       datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 3),
+                       datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 5, 4, 30),
+                       datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 5, 4, 30, 30)}))
+
+        for offset, cases in tests:
+            for base, expected in compat.iteritems(cases):
+                assertEq(offset, base, expected)
+
+    def test_apply_large_n(self):
+        tests = []
+
+        tests.append((BusinessHour(40), # A week later
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 8, 11),
+                       datetime(2014, 7, 1, 13): datetime(2014, 7, 8, 13),
+                       datetime(2014, 7, 1, 15): datetime(2014, 7, 8, 15),
+                       datetime(2014, 7, 1, 16): datetime(2014, 7, 8, 16),
+                       datetime(2014, 7, 1, 17): datetime(2014, 7, 9, 9),
+                       datetime(2014, 7, 2, 11): datetime(2014, 7, 9, 11),
+                       datetime(2014, 7, 2, 8): datetime(2014, 7, 9, 9),
+                       datetime(2014, 7, 2, 19): datetime(2014, 7, 10, 9),
+                       datetime(2014, 7, 2, 23): datetime(2014, 7, 10, 9),
+                       datetime(2014, 7, 3, 0): datetime(2014, 7, 10, 9),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 9),
+                       datetime(2014, 7, 4, 18): datetime(2014, 7, 14, 9),
+                       datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 14, 9, 30),
+                       datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 14, 9, 30, 30)}))
+
+        tests.append((BusinessHour(-25), # 3 days and 1 hour before
+                      {datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10),
+                       datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 12),
+                       datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 16),
+                       datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 17),
+                       datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10),
+                       datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 16),
+                       datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 16),
+                       datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 16),
+                       datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 16),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 16),
+                       datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 16, 30),
+                       datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30)}))
+
+        tests.append((BusinessHour(28, start='21:00', end='02:00'), # 5 days and 3 hours later
+                      {datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0),
+                       datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 1),
+                       datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21),
+                       datetime(2014, 7, 2, 2): datetime(2014, 7, 10, 0),
+                       datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0),
+                       datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23),
+                       datetime(2014, 7, 4, 2): datetime(2014, 7, 12, 0),
+                       datetime(2014, 7, 4, 3): datetime(2014, 7, 12, 0),
+                       datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23),
+                       datetime(2014, 7, 5, 15): datetime(2014, 7, 15, 0),
+                       datetime(2014, 7, 6, 18): datetime(2014, 7, 15, 0),
+                       datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0),
+                       datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30)}))
+
+        for offset, cases in tests:
+            for base, expected in compat.iteritems(cases):
+                assertEq(offset, base, expected)
+
+    def test_apply_nanoseconds(self):
+        tests = []
+
+        tests.append((BusinessHour(),
+                      {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp('2014-07-04 16:00') + Nano(5),
+                       Timestamp('2014-07-04 16:00') + Nano(5): Timestamp('2014-07-07 09:00') + Nano(5),
+                       Timestamp('2014-07-04 16:00') - Nano(5): Timestamp('2014-07-04 17:00') - Nano(5)
+                       }))
+
+        tests.append((BusinessHour(-1),
+                      {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp('2014-07-04 14:00') + Nano(5),
+                       Timestamp('2014-07-04 10:00') + Nano(5): Timestamp('2014-07-04 09:00') + Nano(5),
+                       Timestamp('2014-07-04 10:00') - Nano(5): Timestamp('2014-07-03 17:00') - Nano(5),
+                       }))
+
+        for offset, cases in tests:
+            for base, expected in compat.iteritems(cases):
+                assertEq(offset, base, expected)
+
+    def test_offsets_compare_equal(self):
+        # root cause of #456
+        offset1 = self._offset()
+        offset2 = self._offset()
+        self.assertFalse(offset1 != offset2)
+
+    def test_datetimeindex(self):
+        idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00', freq='BH')
+        idx2 = DatetimeIndex(start='2014-07-04 15:00', periods=12, freq='BH')
+        idx3 = DatetimeIndex(end='2014-07-08 10:00', periods=12, freq='BH')
+        expected = DatetimeIndex(['2014-07-04 15:00', '2014-07-04 16:00', '2014-07-07 09:00',
+                                  '2014-07-07 10:00', '2014-07-07 11:00', '2014-07-07 12:00',
+                                  '2014-07-07 13:00', '2014-07-07 14:00', '2014-07-07 15:00',
+                                  '2014-07-07 16:00', '2014-07-08 09:00', '2014-07-08 10:00'],
+                                  freq='BH')
+        for idx in [idx1, idx2, idx3]:
+            tm.assert_index_equal(idx, expected)
+
+        idx1 = DatetimeIndex(start='2014-07-04 15:45', end='2014-07-08 10:45', freq='BH')
+        idx2 = DatetimeIndex(start='2014-07-04 15:45', periods=12, freq='BH')
+        idx3 = DatetimeIndex(end='2014-07-08 10:45', periods=12, freq='BH')
+
+        expected = DatetimeIndex(['2014-07-04 15:45', '2014-07-04 16:45', '2014-07-07 09:45',
+                                  '2014-07-07 10:45', '2014-07-07 11:45', '2014-07-07 12:45',
+                                  '2014-07-07 13:45', '2014-07-07 14:45', '2014-07-07 15:45',
+                                  '2014-07-07 16:45', '2014-07-08 09:45', '2014-07-08 10:45'],
+                                  freq='BH')
+        expected = idx1
+        for idx in [idx1, idx2, idx3]:
+            tm.assert_index_equal(idx, expected)
+
+
 class TestCustomBusinessDay(Base):
     _multiprocess_can_split_ = True
     _offset = CDay
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 964e8634bc1ef..0c4961d80a5f4 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -288,7 +288,7 @@ def test_indexing(self):
         self.assertRaises(KeyError, df.__getitem__, df.index[2],)
 
     def test_recreate_from_data(self):
-        freqs = ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'H', 'N', 'C']
+        freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', 'C']
 
         for f in freqs:
             org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1)
@@ -3347,6 +3347,29 @@ def test_date_range_bms_bug(self):
         ex_first = Timestamp('2000-01-03')
         self.assertEqual(rng[0], ex_first)
 
+    def test_date_range_businesshour(self):
+        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00',
+                             '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00',
+                             '2014-07-04 15:00', '2014-07-04 16:00'], freq='BH')
+        rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH')
+        tm.assert_index_equal(idx, rng)
+
+        idx = DatetimeIndex(['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH')
+        rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH')
+        tm.assert_index_equal(idx, rng)
+
+        idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00',
+                             '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00',
+                             '2014-07-04 15:00', '2014-07-04 16:00',
+                             '2014-07-07 09:00', '2014-07-07 10:00', '2014-07-07 11:00',
+                             '2014-07-07 12:00', '2014-07-07 13:00', '2014-07-07 14:00',
+                             '2014-07-07 15:00', '2014-07-07 16:00',
+                             '2014-07-08 09:00', '2014-07-08 10:00', '2014-07-08 11:00',
+                             '2014-07-08 12:00', '2014-07-08 13:00', '2014-07-08 14:00',
+                             '2014-07-08 15:00', '2014-07-08 16:00'], freq='BH')
+        rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH')
+        tm.assert_index_equal(idx, rng)
+
     def test_string_index_series_name_converted(self):
         # #1644
         df = DataFrame(np.random.randn(10, 4),

From 608f770ae5c4494391d88811182badeb63f0c938 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 6 May 2015 01:42:54 +0200
Subject: [PATCH 153/239] DOC: remove use of deprecated index set operation
 (follow-up GH10042)

---
 doc/source/basics.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index d2b899716bce2..76efdc0553c7d 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -1004,7 +1004,7 @@ Note that the following also works, but is a bit less obvious / clean:
 
 .. ipython:: python
 
-   df.reindex(df.index - ['a', 'd'])
+   df.reindex(df.index.difference(['a', 'd']))
 
 .. _basics.rename:
 

From 88877465568ad5c8f17a79b111ec42f512a071c5 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Wed, 29 Apr 2015 11:06:15 +0900
Subject: [PATCH 154/239] ENH: Added str.normalize to use unicodedata.normalize

---
 doc/source/api.rst              |  1 +
 doc/source/text.rst             |  1 +
 doc/source/whatsnew/v0.16.1.txt |  2 +
 pandas/core/base.py             | 13 ++++-
 pandas/core/strings.py          | 19 +++++++
 pandas/tests/test_strings.py    | 97 +++++++++++++++++++++++++++++++++
 6 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 34cecc3ecf215..2d9fc0df5347d 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -542,6 +542,7 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.lower
    Series.str.lstrip
    Series.str.match
+   Series.str.normalize
    Series.str.pad
    Series.str.repeat
    Series.str.replace
diff --git a/doc/source/text.rst b/doc/source/text.rst
index dea40fb48748d..359b6d61dbb64 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -268,6 +268,7 @@ Method Summary
     :meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
     :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
     :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
+    :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
     :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
     :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
     :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 33e5c883855a7..b6f6bcd801526 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -40,6 +40,8 @@ Enhancements
 - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
 - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
+- Added ``StringMethods.normalize()`` which behaves the same as standard :func:`unicodedata.normalizes` (:issue:`10031`)
+
 - Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
   The ``.str`` accessor is now available for both ``Series`` and ``Index``.
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9c27f3c7a2cc3..2f171cdd6adf3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -517,9 +517,16 @@ def _make_str_accessor(self):
             raise AttributeError("Can only use .str accessor with string "
                                  "values, which use np.object_ dtype in "
                                  "pandas")
-        elif isinstance(self, Index) and self.inferred_type != 'string':
-            raise AttributeError("Can only use .str accessor with string "
-                                 "values (i.e. inferred_type is 'string')")
+        elif isinstance(self, Index):
+            # see scc/inferrence.pyx which can contain string values
+            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
+            if self.inferred_type not in allowed_types:
+                message = ("Can only use .str accessor with string values "
+                           "(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
+                raise AttributeError(message)
+            if self.nlevels > 1:
+                message = "Can only use .str accessor with Index, not MultiIndex"
+                raise AttributeError(message)
         return StringMethods(self)
 
     str = AccessorProperty(StringMethods, _make_str_accessor)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 3506338afd9d4..5cea4c4afe8cc 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1206,6 +1206,25 @@ def rfind(self, sub, start=0, end=None):
         result = str_find(self.series, sub, start=start, end=end, side='right')
         return self._wrap_result(result)
 
+    def normalize(self, form):
+        """Return the Unicode normal form for the strings in the Series/Index.
+        For more information on the forms, see the
+        :func:`unicodedata.normalize`.
+
+        Parameters
+        ----------
+        form : {'NFC', 'NFKC', 'NFD', 'NFKD'}
+            Unicode form
+
+        Returns
+        -------
+        normalized : Series/Index of objects
+        """
+        import unicodedata
+        f = lambda x: unicodedata.normalize(form, compat.u_safe(x))
+        result = _na_map(f, self.series)
+        return self._wrap_result(result)
+
     _shared_docs['len'] = ("""
     Compute length of each string in the Series/Index.
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 9283be566bd8f..d3875f0675e9f 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -685,6 +685,7 @@ def test_empty_str_methods(self):
         tm.assert_series_equal(empty_str, empty.str.isdecimal())
         tm.assert_series_equal(empty_str, empty.str.capitalize())
         tm.assert_series_equal(empty_str, empty.str.swapcase())
+        tm.assert_series_equal(empty_str, empty.str.normalize('NFC'))
 
     def test_ismethods(self):
         values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', '  ']
@@ -1549,6 +1550,51 @@ def test_encode_decode_errors(self):
 
         tm.assert_series_equal(result, exp)
 
+    def test_normalize(self):
+        def unistr(codes):
+            # build unicode string from unichr
+            # we cannot use six.u() here because it escapes unicode
+            return ''.join([unichr(c) for c in codes])
+
+        values = ['ABC', # ASCII
+                  unistr([0xFF21, 0xFF22, 0xFF23]), # ＡＢＣ
+                  unistr([0xFF11, 0xFF12, 0xFF13]), # １２３
+                  np.nan,
+                  unistr([0xFF71, 0xFF72, 0xFF74])] # ｱｲｴ
+        s = Series(values, index=['a', 'b', 'c', 'd', 'e'])
+
+        normed = [compat.u_safe('ABC'),
+                  compat.u_safe('ABC'),
+                  compat.u_safe('123'),
+                  np.nan,
+                  unistr([0x30A2, 0x30A4, 0x30A8])] # アイエ
+        expected = Series(normed, index=['a', 'b', 'c', 'd', 'e'])
+
+        result = s.str.normalize('NFKC')
+        tm.assert_series_equal(result, expected)
+
+        expected = Series([compat.u_safe('ABC'),
+                           unistr([0xFF21, 0xFF22, 0xFF23]), # ＡＢＣ
+                           unistr([0xFF11, 0xFF12, 0xFF13]), # １２３
+                           np.nan,
+                           unistr([0xFF71, 0xFF72, 0xFF74])], # ｱｲｴ
+                          index=['a', 'b', 'c', 'd', 'e'])
+
+        result = s.str.normalize('NFC')
+        tm.assert_series_equal(result, expected)
+
+        with tm.assertRaisesRegexp(ValueError, "invalid normalization form"):
+            s.str.normalize('xxx')
+
+        s = Index([unistr([0xFF21, 0xFF22, 0xFF23]),  # ＡＢＣ
+                   unistr([0xFF11, 0xFF12, 0xFF13]),  # １２３
+                   unistr([0xFF71, 0xFF72, 0xFF74])]) # ｱｲｴ
+        expected = Index([compat.u_safe('ABC'),
+                          compat.u_safe('123'),
+                          unistr([0x30A2, 0x30A4, 0x30A8])])
+        result = s.str.normalize('NFKC')
+        tm.assert_index_equal(result, expected)
+
     def test_cat_on_filtered_index(self):
         df = DataFrame(index=MultiIndex.from_product([[2011, 2012], [1,2,3]],
                                                      names=['year', 'month']))
@@ -1567,6 +1613,57 @@ def test_cat_on_filtered_index(self):
         self.assertEqual(str_multiple.loc[1], '2011 2 2')
 
 
+    def test_index_str_accessor_visibility(self):
+        from pandas.core.strings import StringMethods
+
+        if not compat.PY3:
+            cases = [(['a', 'b'], 'string'),
+                     (['a', u('b')], 'mixed'),
+                     ([u('a'), u('b')], 'unicode'),
+                     (['a', 'b', 1], 'mixed-integer'),
+                     (['a', 'b', 1.3], 'mixed'),
+                     (['a', 'b', 1.3, 1], 'mixed-integer'),
+                     (['aa', datetime(2011, 1, 1)], 'mixed')]
+        else:
+            cases = [(['a', 'b'], 'string'),
+                     (['a', u('b')], 'string'),
+                     ([u('a'), u('b')], 'string'),
+                     (['a', 'b', 1], 'mixed-integer'),
+                     (['a', 'b', 1.3], 'mixed'),
+                     (['a', 'b', 1.3, 1], 'mixed-integer'),
+                     (['aa', datetime(2011, 1, 1)], 'mixed')]
+        for values, tp in cases:
+            idx = Index(values)
+            self.assertTrue(isinstance(Series(values).str, StringMethods))
+            self.assertTrue(isinstance(idx.str, StringMethods))
+            self.assertEqual(idx.inferred_type, tp)
+
+        for values, tp in cases:
+            idx = Index(values)
+            self.assertTrue(isinstance(Series(values).str, StringMethods))
+            self.assertTrue(isinstance(idx.str, StringMethods))
+            self.assertEqual(idx.inferred_type, tp)
+
+        cases = [([1, np.nan], 'floating'),
+                 ([datetime(2011, 1, 1)], 'datetime64'),
+                 ([timedelta(1)], 'timedelta64')]
+        for values, tp in cases:
+            idx = Index(values)
+            message = 'Can only use .str accessor with string values'
+            with self.assertRaisesRegexp(AttributeError, message):
+                Series(values).str
+            with self.assertRaisesRegexp(AttributeError, message):
+                idx.str
+            self.assertEqual(idx.inferred_type, tp)
+
+        # MultiIndex has mixed dtype, but not allow to use accessor
+        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
+        self.assertEqual(idx.inferred_type, 'mixed')
+        message = 'Can only use .str accessor with Index, not MultiIndex'
+        with self.assertRaisesRegexp(AttributeError, message):
+            idx.str
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)

From c544d754822cbb334c118973ae9d79c7d36b5674 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 6 May 2015 08:01:48 -0400
Subject: [PATCH 155/239] DOC: fix in v0.16.1

---
 doc/source/whatsnew/v0.16.1.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index b6f6bcd801526..03e150196ef2a 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -10,7 +10,7 @@ We recommend that all users upgrade to this version.
 Highlights include:
 
 - Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
-- New section on how-to-contribute to *pandas*, see :ref`here <contributing>`
+- New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
 
 - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enchancements.sample>`
 

From e7ad3dace4c3376541ad2b1d0048483ab6b2bc43 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 6 May 2015 07:59:57 -0400
Subject: [PATCH 156/239] DOC/CI: add blosc to optional deps list (msgpack
 compression)

---
 doc/source/install.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index 07c88841e5dcb..0d473522c1c6e 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -255,6 +255,7 @@ Optional Dependencies
    * Alternative Excel writer.
 * `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3
   access.
+* `blosc <https://pypi.python.org/pypi/blosc`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
   <http://qt-project.org/wiki/Category:LanguageBindings::PySide>`__, `pygtk

From 2f2d209a8341c856d65d40208516c393284d5916 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 4 May 2015 20:28:13 -0700
Subject: [PATCH 157/239] DOC/CLN: fixed typos in timeseries.rst

---
 doc/source/timeseries.rst | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index a78fcf5224fc2..172c71dc53c46 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -243,7 +243,7 @@ variety of frequency aliases. The default frequency for ``date_range`` is a
    rng = bdate_range(start, end)
    rng
 
-``date_range`` and ``bdate_range`` makes it easy to generate a range of dates
+``date_range`` and ``bdate_range`` make it easy to generate a range of dates
 using various combinations of parameters like ``start``, ``end``,
 ``periods``, and ``freq``:
 
@@ -353,7 +353,7 @@ This specifies an **exact** stop time (and is not the same as the above)
 
    dft['2013-1':'2013-2-28 00:00:00']
 
-We are stopping on the included end-point as its part of the index
+We are stopping on the included end-point as it is part of the index
 
 .. ipython:: python
 
@@ -541,7 +541,7 @@ The ``rollforward`` and ``rollback`` methods do exactly what you would expect:
 It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
 various docstrings for the classes.
 
-These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (hour, minute, etc) information by default. To reset time, use ``normalize=True`` keyword when create offset instance. If ``normalize=True``, result is normalized after the function is applied.
+These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (hour, minute, etc) information by default. To reset time, use ``normalize=True`` keyword when creating the offset instance. If ``normalize=True``, result is normalized after the function is applied.
 
 
   .. ipython:: python
@@ -564,7 +564,7 @@ Parametric offsets
 ~~~~~~~~~~~~~~~~~~
 
 Some of the offsets can be "parameterized" when created to result in different
-behavior. For example, the ``Week`` offset for generating weekly data accepts a
+behaviors. For example, the ``Week`` offset for generating weekly data accepts a
 ``weekday`` parameter which results in the generated dates always lying on a
 particular day of the week:
 
@@ -904,7 +904,7 @@ strongly recommended that you switch to using the new offset aliases.
     "ms", "L"
     "us", "U"
 
-As you can see, legacy quarterly and annual frequencies are business quarter
+As you can see, legacy quarterly and annual frequencies are business quarters
 and business year ends. Please also note the legacy time rule for milliseconds
 ``ms`` versus the new offset alias for month start ``MS``. This means that
 offset alias parsing is case sensitive.
@@ -1158,8 +1158,8 @@ frequency periods.
 Note that 0.8 marks a watershed in the timeseries functionality in pandas. In
 previous versions, resampling had to be done using a combination of
 ``date_range``, ``groupby`` with ``asof``, and then calling an aggregation
-function on the grouped object. This was not nearly convenient or performant as
-the new pandas timeseries API.
+function on the grouped object. This was not nearly as convenient or performant
+as the new pandas timeseries API.
 
 .. _timeseries.periods:
 
@@ -1197,7 +1197,7 @@ frequency.
 
    p - 3
 
-If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``), ``offsets`` and ``timedelta``-like can be added if the result can have same freq. Otherise, ``ValueError`` will be raised.
+If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``), ``offsets`` and ``timedelta``-like can be added if the result can have the same freq. Otherise, ``ValueError`` will be raised.
 
 .. ipython:: python
 
@@ -1258,7 +1258,7 @@ objects:
    ps = Series(randn(len(prng)), prng)
    ps
 
-``PeriodIndex`` supports addition and subtraction as the same rule as ``Period``.
+``PeriodIndex`` supports addition and subtraction with the same rule as ``Period``.
 
 .. ipython:: python
 
@@ -1273,7 +1273,7 @@ objects:
 PeriodIndex Partial String Indexing
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-You can pass in dates and strings to `Series` and `DataFrame` with `PeriodIndex`, as the same manner as `DatetimeIndex`. For details, refer to :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>`.
+You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodIndex``, in the same manner as ``DatetimeIndex``. For details, refer to :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>`.
 
 .. ipython:: python
 
@@ -1283,7 +1283,7 @@ You can pass in dates and strings to `Series` and `DataFrame` with `PeriodIndex`
 
    ps['10/31/2011':'12/31/2011']
 
-Passing string represents lower frequency than `PeriodIndex` returns partial sliced data.
+Passing a string representing a lower frequency than ``PeriodIndex`` returns partial sliced data.
 
 .. ipython:: python
 
@@ -1294,7 +1294,7 @@ Passing string represents lower frequency than `PeriodIndex` returns partial sli
    dfp
    dfp['2013-01-01 10H']
 
-As the same as `DatetimeIndex`, the endpoints will be included in the result. Below example slices data starting from 10:00 to 11:59.
+As with ``DatetimeIndex``, the endpoints will be included in the result. The example below slices data starting from 10:00 to 11:59.
 
 .. ipython:: python
 
@@ -1302,7 +1302,7 @@ As the same as `DatetimeIndex`, the endpoints will be included in the result. Be
 
 Frequency Conversion and Resampling with PeriodIndex
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The frequency of Periods and PeriodIndex can be converted via the ``asfreq``
+The frequency of ``Period`` and ``PeriodIndex`` can be converted via the ``asfreq``
 method. Let's start with the fiscal year 2011, ending in December:
 
 .. ipython:: python
@@ -1345,8 +1345,8 @@ period.
 Period conversions with anchored frequencies are particularly useful for
 working with various quarterly data common to economics, business, and other
 fields. Many organizations define quarters relative to the month in which their
-fiscal year start and ends. Thus, first quarter of 2011 could start in 2010 or
-a few months into 2011. Via anchored frequencies, pandas works all quarterly
+fiscal year starts and ends. Thus, first quarter of 2011 could start in 2010 or
+a few months into 2011. Via anchored frequencies, pandas works for all quarterly
 frequencies ``Q-JAN`` through ``Q-DEC``.
 
 ``Q-DEC`` define regular calendar quarters:
@@ -1452,7 +1452,7 @@ Time Zone Handling
 ------------------
 
 Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries.
-``dateutil`` support is new [in 0.14.1] and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``.
+``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``.
 Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages.
 
 Working with Time Zones

From e69e1be60935d89aa4ea767c5e6d26c1036993cf Mon Sep 17 00:00:00 2001
From: gfr <grant.roch@gmail.com>
Date: Wed, 6 May 2015 10:36:16 -0400
Subject: [PATCH 158/239] DOC: Note on PyTables index issue and additional
 Contributing refinements.

---
 doc/source/contributing.rst     | 30 ++++++++++++++++++++++--------
 doc/source/index.rst.template   |  2 +-
 doc/source/install.rst          |  2 +-
 doc/source/io.rst               |  4 ++++
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 5 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 7785f5fe3283d..1ece60bf704d6 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -112,8 +112,10 @@ want to clone your fork to your machine: ::
 This creates the directory `pandas-yourname` and connects your repository to
 the upstream (main project) *pandas* repository.
 
-You will also need to hook up Travis-CI to your GitHub repository so the suite
-is automatically run when a Pull Request is submitted.  Instructions are `here
+The testing suite will run automatically on Travis-CI once your Pull Request is
+submitted.  However, if you wish to run the test suite on a branch prior to 
+submitting the Pull Request, then Travis-CI needs to be hooked up to your
+GitHub repository.  Instructions are for doing so are `here
 <http://about.travis-ci.org/docs/user/getting-started/>`_.
 
 Creating a Branch
@@ -134,6 +136,17 @@ changes in this branch specific to one bug or feature so it is clear
 what the branch brings to *pandas*. You can have many shiny-new-features
 and switch in between them using the git checkout command.
 
+To update this branch, you need to retrieve the changes from the master branch::
+
+    git fetch upstream
+    git rebase upstream/master
+
+This will replay your commits on top of the lastest pandas git master.  If this
+leads to merge conflicts, you must resolve these before submitting your Pull
+Request.  If you have uncommitted changes, you will need to `stash` them prior
+to updating.  This will effectively store your changes and they can be reapplied
+after updating.
+
 .. _contributing.dev_env:
 
 Creating a Development Environment
@@ -338,7 +351,7 @@ dependencies.
 Building the documentation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-So how do you build the docs? Navigate to your local  the folder
+So how do you build the docs? Navigate to your local the folder
 ``pandas/doc/`` directory in the console and run::
 
     python make.py html
@@ -358,8 +371,9 @@ If you want to do a full clean build, do::
 
 Starting with 0.13.1 you can tell ``make.py`` to compile only a single section
 of the docs, greatly reducing the turn-around time for checking your changes.
-You will be prompted to delete `.rst` files that aren't required, since the
-last committed version can always be restored from git.
+You will be prompted to delete `.rst` files that aren't required.  This is okay
+since the prior version can be checked out from git, but make sure to 
+not commit the file deletions.
 
 ::
 
@@ -417,7 +431,7 @@ deprecation warnings where needed.
 Test-driven Development/Writing Code
 ------------------------------------
 
-*Pandas* is serious about `Test-driven Development (TDD)
+*Pandas* is serious about testing and strongly encourages individuals to embrace `Test-driven Development (TDD)
 <http://en.wikipedia.org/wiki/Test-driven_development>`_.
 This development process "relies on the repetition of a very short development cycle:
 first the developer writes an (initially failing) automated test case that defines a desired
@@ -550,8 +564,8 @@ Doing 'git status' again should give something like ::
     #       modified:   /relative/path/to/file-you-added.py
     #
 
-Finally, commit your changes to your local repository with an explanatory message.  An informal
-commit message format is in effect for the project.  Please try to adhere to it.  Here are
+Finally, commit your changes to your local repository with an explanatory message.  *Pandas*
+uses a convention for commit message prefixes and layout.  Here are
 some common prefixes along with general guidelines for when to use them:
 
     * ENH: Enhancement, new functionality
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index ee779715bcb95..fb63d0c6d66f1 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -115,6 +115,7 @@ See the package overview for more detail about what's in the library.
     {%if not single -%}
     whatsnew
     install
+    contributing
     faq
     overview
     10min
@@ -149,7 +150,6 @@ See the package overview for more detail about what's in the library.
     api
     {% endif -%}
     {%if not single -%}
-    contributing
     internals
     release
     {% endif -%}
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 0d473522c1c6e..79adab0463588 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -243,7 +243,7 @@ Optional Dependencies
 * `Cython <http://www.cython.org>`__: Only necessary to build development
   version. Version 0.19.1 or higher.
 * `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
-* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required.
+* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.0 or higher highly recommended.
 * `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended.
 * `matplotlib <http://matplotlib.sourceforge.net/>`__: for plotting
 * `statsmodels <http://statsmodels.sourceforge.net/>`__
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 52c7402594c60..73a2f2f1d3531 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -2364,6 +2364,10 @@ for some advanced strategies
 
    As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version).
 
+.. warning::
+   
+   There is a ``PyTables`` indexing bug which may appear when querying stores using an index.  If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2.  Stores created previously will need to be rewritten using the updated version.
+
 .. ipython:: python
    :suppress:
    :okexcept:
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 03e150196ef2a..7967f467abf70 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -380,7 +380,7 @@ Bug Fixes
 
 
-
+- Bug in PyTables queries that did not return proper results using the index (:issue:`8265`, :issue:`9676`)
 
 
From 5391256dd6fa36a44f7af782eff3e36a7ff484b8 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Wed, 1 Apr 2015 22:32:56 +0900
Subject: [PATCH 159/239] ENH: Add StringMethods.partition and rpartition

---
 doc/source/api.rst              |   2 +
 doc/source/text.rst             |   2 +
 doc/source/whatsnew/v0.16.1.txt |   1 +
 pandas/core/strings.py          |  88 +++++++++++++++++++++++
 pandas/tests/test_strings.py    | 121 ++++++++++++++++++++++++++++++++
 5 files changed, 214 insertions(+)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 2d9fc0df5347d..364b3ba04aefb 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -544,10 +544,12 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.match
    Series.str.normalize
    Series.str.pad
+   Series.str.partition
    Series.str.repeat
    Series.str.replace
    Series.str.rfind
    Series.str.rjust
+   Series.str.rpartition
    Series.str.rstrip
    Series.str.slice
    Series.str.slice_replace
diff --git a/doc/source/text.rst b/doc/source/text.rst
index 359b6d61dbb64..bb27fe52ba7a5 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -262,6 +262,8 @@ Method Summary
     :meth:`~Series.str.strip`,Equivalent to ``str.strip``
     :meth:`~Series.str.rstrip`,Equivalent to ``str.rstrip``
     :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
+    :meth:`~Series.str.partition`,Equivalent to ``str.partition``
+    :meth:`~Series.str.rpartition`,Equivalent to ``str.rpartition``
     :meth:`~Series.str.lower`,Equivalent to ``str.lower``
     :meth:`~Series.str.upper`,Equivalent to ``str.upper``
     :meth:`~Series.str.find`,Equivalent to ``str.find``
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 7967f467abf70..93feda9fd8151 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -42,6 +42,7 @@ Enhancements
 - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
 - Added ``StringMethods.normalize()`` which behaves the same as standard :func:`unicodedata.normalizes` (:issue:`10031`)
 
+- Added ``StringMethods.partition()`` and ``rpartition()`` which behave as the same as standard ``str`` (:issue:`9773`)
 - Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
   The ``.str`` accessor is now available for both ``Series`` and ``Index``.
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 5cea4c4afe8cc..62e9e0fbc41ae 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -992,6 +992,8 @@ def __iter__(self):
             g = self.get(i)
 
     def _wrap_result(self, result):
+        # leave as it is to keep extract and get_dummies results
+        # can be merged to _wrap_result_expand in v0.17
         from pandas.core.series import Series
         from pandas.core.frame import DataFrame
         from pandas.core.index import Index
@@ -1012,6 +1014,33 @@ def _wrap_result(self, result):
             assert result.ndim < 3
             return DataFrame(result, index=self.series.index)
 
+    def _wrap_result_expand(self, result, expand=False):
+        from pandas.core.index import Index
+        if not hasattr(result, 'ndim'):
+            return result
+
+        if isinstance(self.series, Index):
+            name = getattr(result, 'name', None)
+            # if result is a boolean np.array, return the np.array
+            # instead of wrapping it into a boolean Index (GH 8875)
+            if hasattr(result, 'dtype') and is_bool_dtype(result):
+                return result
+
+            if expand:
+                result = list(result)
+            return Index(result, name=name)
+        else:
+            index = self.series.index
+            if expand:
+                cons_row = self.series._constructor
+                cons = self.series._constructor_expanddim
+                data = [cons_row(x) for x in result]
+                return cons(data, index=index)
+            else:
+                name = getattr(result, 'name', None)
+                cons = self.series._constructor
+                return cons(result, name=name, index=index)
+
     @copy(str_cat)
     def cat(self, others=None, sep=None, na_rep=None):
         result = str_cat(self.series, others=others, sep=sep, na_rep=na_rep)
@@ -1022,6 +1051,65 @@ def split(self, pat=None, n=-1, return_type='series'):
         result = str_split(self.series, pat, n=n, return_type=return_type)
         return self._wrap_result(result)
 
+    _shared_docs['str_partition'] = ("""
+    Split the string at the %(side)s occurrence of `sep`, and return 3 elements
+    containing the part before the separator, the separator itself,
+    and the part after the separator.
+    If the separator is not found, return %(return)s.
+
+    Parameters
+    ----------
+    pat : string, default whitespace
+        String to split on.
+    expand : bool, default True
+        * If True, return DataFrame/MultiIndex expanding dimensionality.
+        * If False, return Series/Index
+
+    Returns
+    -------
+    split : DataFrame/MultiIndex or Series/Index of objects
+
+    See Also
+    --------
+    %(also)s
+
+    Examples
+    --------
+
+    >>> s = Series(['A_B_C', 'D_E_F', 'X'])
+    0    A_B_C
+    1    D_E_F
+    2        X
+    dtype: object
+
+    >>> s.str.partition('_')
+       0  1    2
+    0  A  _  B_C
+    1  D  _  E_F
+    2  X
+
+    >>> s.str.rpartition('_')
+         0  1  2
+    0  A_B  _  C
+    1  D_E  _  F
+    2          X
+    """)
+    @Appender(_shared_docs['str_partition'] % {'side': 'first',
+        'return': '3 elements containing the string itself, followed by two empty strings',
+        'also': 'rpartition : Split the string at the last occurrence of `sep`'})
+    def partition(self, pat=' ', expand=True):
+        f = lambda x: x.partition(pat)
+        result = _na_map(f, self.series)
+        return self._wrap_result_expand(result, expand=expand)
+
+    @Appender(_shared_docs['str_partition'] % {'side': 'last',
+        'return': '3 elements containing two empty strings, followed by the string itself',
+        'also': 'partition : Split the string at the first occurrence of `sep`'})
+    def rpartition(self, pat=' ', expand=True):
+        f = lambda x: x.rpartition(pat)
+        result = _na_map(f, self.series)
+        return self._wrap_result_expand(result, expand=expand)
+
     @copy(str_get)
     def get(self, i):
         result = str_get(self.series, i)
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index d3875f0675e9f..1f84e1dc4d155 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -664,6 +664,8 @@ def test_empty_str_methods(self):
         tm.assert_series_equal(empty_str, empty.str.pad(42))
         tm.assert_series_equal(empty_str, empty.str.center(42))
         tm.assert_series_equal(empty_list, empty.str.split('a'))
+        tm.assert_series_equal(empty_list, empty.str.partition('a', expand=False))
+        tm.assert_series_equal(empty_list, empty.str.rpartition('a', expand=False))
         tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
         tm.assert_series_equal(empty_str, empty.str.slice(step=1))
         tm.assert_series_equal(empty_str, empty.str.strip())
@@ -687,6 +689,12 @@ def test_empty_str_methods(self):
         tm.assert_series_equal(empty_str, empty.str.swapcase())
         tm.assert_series_equal(empty_str, empty.str.normalize('NFC'))
 
+    def test_empty_str_methods_to_frame(self):
+        empty_str = empty = Series(dtype=str)
+        empty_df = DataFrame([])
+        tm.assert_frame_equal(empty_df, empty.str.partition('a'))
+        tm.assert_frame_equal(empty_df, empty.str.rpartition('a'))
+
     def test_ismethods(self):
         values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', '  ']
         str_s = Series(values)
@@ -1175,6 +1183,119 @@ def test_split_to_dataframe(self):
         with tm.assertRaisesRegexp(ValueError, "return_type must be"):
             s.str.split('_', return_type="some_invalid_type")
 
+    def test_partition_series(self):
+        values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
+
+        result = values.str.partition('_', expand=False)
+        exp = Series([['a', '_', 'b_c'], ['c', '_', 'd_e'], NA, ['f', '_', 'g_h']])
+        tm.assert_series_equal(result, exp)
+
+        result = values.str.rpartition('_', expand=False)
+        exp = Series([['a_b', '_', 'c'], ['c_d', '_', 'e'], NA, ['f_g', '_', 'h']])
+        tm.assert_series_equal(result, exp)
+
+        # more than one char
+        values = Series(['a__b__c', 'c__d__e', NA, 'f__g__h'])
+        result = values.str.partition('__', expand=False)
+        exp = Series([['a', '__', 'b__c'], ['c', '__', 'd__e'], NA, ['f', '__', 'g__h']])
+        tm.assert_series_equal(result, exp)
+
+        result = values.str.rpartition('__', expand=False)
+        exp = Series([['a__b', '__', 'c'], ['c__d', '__', 'e'], NA, ['f__g', '__', 'h']])
+        tm.assert_series_equal(result, exp)
+
+        # None
+        values = Series(['a b c', 'c d e', NA, 'f g h'])
+        result = values.str.partition(expand=False)
+        exp = Series([['a', ' ', 'b c'], ['c', ' ', 'd e'], NA, ['f', ' ', 'g h']])
+        tm.assert_series_equal(result, exp)
+
+        result = values.str.rpartition(expand=False)
+        exp = Series([['a b', ' ', 'c'], ['c d', ' ', 'e'], NA, ['f g', ' ', 'h']])
+        tm.assert_series_equal(result, exp)
+
+        # Not splited
+        values = Series(['abc', 'cde', NA, 'fgh'])
+        result = values.str.partition('_', expand=False)
+        exp = Series([['abc', '', ''], ['cde', '', ''], NA, ['fgh', '', '']])
+        tm.assert_series_equal(result, exp)
+
+        result = values.str.rpartition('_', expand=False)
+        exp = Series([['', '', 'abc'], ['', '', 'cde'], NA, ['', '', 'fgh']])
+        tm.assert_series_equal(result, exp)
+
+        # unicode
+        values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')])
+
+        result = values.str.partition('_', expand=False)
+        exp = Series([[u('a'), u('_'), u('b_c')], [u('c'), u('_'), u('d_e')],
+                      NA, [u('f'), u('_'), u('g_h')]])
+        tm.assert_series_equal(result, exp)
+
+        result = values.str.rpartition('_', expand=False)
+        exp = Series([[u('a_b'), u('_'), u('c')], [u('c_d'), u('_'), u('e')],
+                      NA, [u('f_g'), u('_'), u('h')]])
+        tm.assert_series_equal(result, exp)
+
+        # compare to standard lib
+        values = Series(['A_B_C', 'B_C_D', 'E_F_G', 'EFGHEF'])
+        result = values.str.partition('_', expand=False).tolist()
+        self.assertEqual(result, [v.partition('_') for v in values])
+        result = values.str.rpartition('_', expand=False).tolist()
+        self.assertEqual(result, [v.rpartition('_') for v in values])
+
+    def test_partition_index(self):
+        values = Index(['a_b_c', 'c_d_e', 'f_g_h'])
+
+        result = values.str.partition('_', expand=False)
+        exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')]))
+        tm.assert_index_equal(result, exp)
+        self.assertEqual(result.nlevels, 1)
+
+        result = values.str.rpartition('_', expand=False)
+        exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')]))
+        tm.assert_index_equal(result, exp)
+        self.assertEqual(result.nlevels, 1)
+
+        result = values.str.partition('_')
+        exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')])
+        tm.assert_index_equal(result, exp)
+        self.assertTrue(isinstance(result, MultiIndex))
+        self.assertEqual(result.nlevels, 3)
+
+        result = values.str.rpartition('_')
+        exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')])
+        tm.assert_index_equal(result, exp)
+        self.assertTrue(isinstance(result, MultiIndex))
+        self.assertEqual(result.nlevels, 3)
+
+    def test_partition_to_dataframe(self):
+        values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
+        result = values.str.partition('_')
+        exp = DataFrame({0: ['a', 'c', np.nan, 'f'],
+                         1: ['_', '_', np.nan, '_'],
+                         2: ['b_c', 'd_e', np.nan, 'g_h']})
+        tm.assert_frame_equal(result, exp)
+
+        result = values.str.rpartition('_')
+        exp = DataFrame({0: ['a_b', 'c_d', np.nan, 'f_g'],
+                         1: ['_', '_', np.nan, '_'],
+                         2: ['c', 'e', np.nan, 'h']})
+        tm.assert_frame_equal(result, exp)
+
+        values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
+        result = values.str.partition('_', expand=True)
+        exp = DataFrame({0: ['a', 'c', np.nan, 'f'],
+                         1: ['_', '_', np.nan, '_'],
+                         2: ['b_c', 'd_e', np.nan, 'g_h']})
+        tm.assert_frame_equal(result, exp)
+
+        result = values.str.rpartition('_', expand=True)
+        exp = DataFrame({0: ['a_b', 'c_d', np.nan, 'f_g'],
+                         1: ['_', '_', np.nan, '_'],
+                         2: ['c', 'e', np.nan, 'h']})
+        tm.assert_frame_equal(result, exp)
+
     def test_pipe_failures(self):
         # #2119
         s = Series(['A|B|C'])

From 5b5909517ce59f491833855c395a1ef4270858d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Frans=20van=20Dunn=C3=A9?= <FvD@users.noreply.github.com>
Date: Thu, 7 May 2015 12:03:59 -0600
Subject: [PATCH 160/239] Corrected typo in Grammar of Graphics

---
 doc/source/ecosystem.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index 194baba807d14..c70b6deade36e 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -57,7 +57,7 @@ large data to thin clients.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Hadley Wickham's `ggplot2 <http://ggplot2.org/>`__ is a foundational exploratory visualization package for the R language.
-Based on `"The Grammer of Graphics" <http://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
+Based on `"The Grammar of Graphics" <http://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
 provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
 It's really quite incredible. Various implementations to other languages are available,
 but a faithful implementation for python users has long been missing. Although still young

From 5bcc14d18a41fd9466c5b73d35446c0550ed8381 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 7 May 2015 16:35:51 -0400
Subject: [PATCH 161/239] TST: dtype comparison issue on windows for
 test_resample_index_points

---
 pandas/tseries/tests/test_resample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index 17f4b01fd4f44..d7b1256329cc3 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -384,7 +384,7 @@ def test_resample_extra_index_point(self):
         expected = DataFrame({'A' : Series([21,41,63], index=index)})
 
         index = DatetimeIndex(start='20150101', end='20150331', freq='B')
-        df = DataFrame({'A' : Series(range(len(index)),index=index)})
+        df = DataFrame({'A' : Series(range(len(index)),index=index)},dtype='int64')
         result = df.resample('BM', how='last')
         assert_frame_equal(result, expected)
 

From 0d91fab0187be4b6e00d71f113f6abdebe457d15 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Thu, 7 May 2015 00:53:29 -0700
Subject: [PATCH 162/239] BUG: median() not correctly handling non-float null
 values (fixes #10040)

---
 doc/source/whatsnew/v0.16.1.txt         |  4 ++++
 pandas/core/nanops.py                   |  1 +
 pandas/tseries/tests/test_timedeltas.py | 10 +++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 93feda9fd8151..32aa0dc49ddde 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -434,4 +434,8 @@ Bug Fixes
 >>>>>>> 2cf4132... Updates to Google BigQuery connector (#9713, #8327)
 =======
 - Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`)
+<<<<<<< HEAD
 >>>>>>> 5805889... Return correct subclass when slicing DataFrame.
+=======
+- BUG in median() where non-float null values are not handled correctly (:issue:`10040`)
+>>>>>>> df730a3... BUG: median() not correctly handling non-float null values (fixes #10040)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index f68f4f9037d97..4121dd8e89bee 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -285,6 +285,7 @@ def get_median(x):
 
     if values.dtype != np.float64:
         values = values.astype('f8')
+        values[mask] = np.nan
 
     if axis is None:
         values = values.ravel()
diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index faf4e3fa57780..45145eb7ab7e8 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -614,7 +614,7 @@ def test_timedelta_ops(self):
         self.assertEqual(result, expected)
 
         result = td.median()
-        expected = to_timedelta('00:00:08')
+        expected = to_timedelta('00:00:09')
         self.assertEqual(result, expected)
 
         result = td.to_frame().median()
@@ -641,6 +641,14 @@ def test_timedelta_ops(self):
         for op in ['skew','kurt','sem','var','prod']:
             self.assertRaises(TypeError, lambda : getattr(td,op)())
 
+        # GH 10040
+        # make sure NaT is properly handled by median()
+        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
+        self.assertEqual(s.diff().median(), timedelta(days=4))
+
+        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')])
+        self.assertEqual(s.diff().median(), timedelta(days=6))
+
     def test_timedelta_ops_scalar(self):
         # GH 6808
         base = pd.to_datetime('20130101 09:01:12.123456')

From 71c417a98798320f64bff61a1edcba0e92e3d993 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 7 May 2015 11:05:27 +0900
Subject: [PATCH 163/239] DOC: Fix v0.16.1 release note

---
 doc/source/internals.rst        |  3 +-
 doc/source/whatsnew/v0.16.1.txt | 67 +++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index bc1189a8961d6..17be04cd64d27 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -94,8 +94,7 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the
 constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
 if you compute the levels and labels yourself, please be careful.
 
-
-.. _:
+.. _ref-subclassing-pandas:
 
 Subclassing pandas Data Structures
 ----------------------------------
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 32aa0dc49ddde..a8f512108f3ce 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -16,6 +16,8 @@ Highlights include:
 
 - ``BusinessHour`` offset is supported, see :ref:`here <timeseries.businesshour>`
 
+-  Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
@@ -37,34 +39,9 @@ Enhancements
      Timestamp('2014-08-01 07:00') + BusinessHour()
      Timestamp('2014-08-01 16:30') + BusinessHour()
 
-- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
 - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
-- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
-- Added ``StringMethods.normalize()`` which behaves the same as standard :func:`unicodedata.normalizes` (:issue:`10031`)
-
-- Added ``StringMethods.partition()`` and ``rpartition()`` which behave as the same as standard ``str`` (:issue:`9773`)
 - Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
-  The ``.str`` accessor is now available for both ``Series`` and ``Index``.
-
-  .. ipython:: python
-
-     idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
-     idx.str.strip()
-
-  One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor
-  will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression
-  to work naturally:
-
-
-  .. ipython:: python
-
-     idx = Index(['a1', 'a2', 'b1', 'b2'])
-     s = Series(range(4), index=idx)
-     s
-     idx.str.startswith('a')
-     s[s.index.str.startswith('a')]
-
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
 
 - ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`)
@@ -199,6 +176,46 @@ when sampling from rows.
    df = DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
    df.sample(n=3, weights='weight_column')
 
+
+.. _whatsnew_0161.enhancements.string:
+
+String Methods Enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, following
+enhancements are performed to make string operation easier.
+
+- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`)
+
+  ================  ===============  ===============  ===============  ================
+  ..                ..               Methods          ..               ..
+  ================  ===============  ===============  ===============  ================
+  ``capitalize()``  ``swapcase()``   ``normalize()``  ``partition()``  ``rpartition()``
+  ================  ===============  ===============  ===============  ================
+
+
+
+- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
+
+  The ``.str`` accessor is now available for both ``Series`` and ``Index``.
+
+  .. ipython:: python
+
+     idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
+     idx.str.strip()
+
+  One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor
+  will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression
+  to work naturally:
+
+  .. ipython:: python
+
+     idx = Index(['a1', 'a2', 'b1', 'b2'])
+     s = Series(range(4), index=idx)
+     s
+     idx.str.startswith('a')
+     s[s.index.str.startswith('a')]
+
 .. _whatsnew_0161.api:
 
 API changes

From ca65274396ff4d9a94a45b1fe171cd5d9a0a05c6 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 May 2015 17:54:38 +0100
Subject: [PATCH 164/239] FIX make take_nd support readonly array

---
 pandas/src/generate_code.py |   62 +-
 pandas/src/generated.pyx    | 2318 ++++++++++++++++++++++++++++++-----
 pandas/tests/test_common.py |   31 +-
 3 files changed, 2105 insertions(+), 306 deletions(-)

diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 9d0384857ed81..3b71d1c083ba1 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -93,12 +93,7 @@ def take_1d_%(name)s_%(dest)s(ndarray[%(c_type_in)s] values,
 
 """
 
-take_2d_axis0_template = """@cython.wraparound(False)
-@cython.boundscheck(False)
-def take_2d_axis0_%(name)s_%(dest)s(%(c_type_in)s[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    %(c_type_out)s[:, :] out,
-                                    fill_value=np.nan):
+inner_take_2d_axis0_template = """\
     cdef:
         Py_ssize_t i, j, k, n, idx
         %(c_type_out)s fv
@@ -140,12 +135,34 @@ def take_2d_axis0_%(name)s_%(dest)s(%(c_type_in)s[:, :] values,
 
 """
 
-take_2d_axis1_template = """@cython.wraparound(False)
+take_2d_axis0_template = """\
+@cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_%(name)s_%(dest)s(%(c_type_in)s[:, :] values,
+cdef inline take_2d_axis0_%(name)s_%(dest)s_memview(%(c_type_in)s[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    %(c_type_out)s[:, :] out,
+                                                    fill_value=np.nan):
+""" + inner_take_2d_axis0_template + """
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     %(c_type_out)s[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_%(name)s_%(dest)s_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+""" + inner_take_2d_axis0_template
+
+
+inner_take_2d_axis1_template = """\
     cdef:
         Py_ssize_t i, j, k, n, idx
         %(c_type_out)s fv
@@ -165,9 +182,36 @@ def take_2d_axis1_%(name)s_%(dest)s(%(c_type_in)s[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = %(preval)svalues[i, idx]%(postval)s
-
 """
 
+take_2d_axis1_template = """\
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_%(name)s_%(dest)s_memview(%(c_type_in)s[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    %(c_type_out)s[:, :] out,
+                                                    fill_value=np.nan):
+""" + inner_take_2d_axis1_template + """
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    %(c_type_out)s[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_%(name)s_%(dest)s_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+""" + inner_take_2d_axis1_template
+
+
 take_2d_multi_template = """@cython.wraparound(False)
 @cython.boundscheck(False)
 def take_2d_multi_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values,
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index cab3a84f6ffe8..ac31fdedf2ea6 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -2704,10 +2704,10 @@ def take_1d_object_object(ndarray[object] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_bool_bool(uint8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    uint8_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_bool_bool_memview(uint8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    uint8_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
         uint8_t fv
@@ -2747,30 +2747,41 @@ def take_2d_axis0_bool_bool(uint8_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_bool_object(uint8_t[:, :] values,
+def take_2d_axis0_bool_bool(ndarray[uint8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    object[:, :] out,
+                                    uint8_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_bool_bool_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        object fv
+        uint8_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF False:
+    IF True:
         cdef:
-            object *v
-            object *o
+            uint8_t *v
+            uint8_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(object) and
-            sizeof(object) * n >= 256):
+            values.strides[1] == sizeof(uint8_t) and
+            sizeof(uint8_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -2780,7 +2791,7 @@ def take_2d_axis0_bool_object(uint8_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(object) * k))
+                    memmove(o, v, <size_t>(sizeof(uint8_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -2790,32 +2801,32 @@ def take_2d_axis0_bool_object(uint8_t[:, :] values,
                 out[i, j] = fv
         else:
             for j from 0 <= j < k:
-                out[i, j] = True if values[idx, j] > 0 else False
+                out[i, j] = values[idx, j]
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int8_int8(int8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int8_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_bool_object_memview(uint8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    object[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int8_t fv
+        object fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF True:
+    IF False:
         cdef:
-            int8_t *v
-            int8_t *o
+            object *v
+            object *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int8_t) and
-            sizeof(int8_t) * n >= 256):
+            values.strides[1] == sizeof(object) and
+            sizeof(object) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -2825,7 +2836,7 @@ def take_2d_axis0_int8_int8(int8_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int8_t) * k))
+                    memmove(o, v, <size_t>(sizeof(object) * k))
             return
 
     for i from 0 <= i < n:
@@ -2835,17 +2846,28 @@ def take_2d_axis0_int8_int8(int8_t[:, :] values,
                 out[i, j] = fv
         else:
             for j from 0 <= j < k:
-                out[i, j] = values[idx, j]
+                out[i, j] = True if values[idx, j] > 0 else False
+
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int8_int32(int8_t[:, :] values,
+def take_2d_axis0_bool_object(ndarray[uint8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int32_t[:, :] out,
+                                    object[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_bool_object_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int32_t fv
+        object fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -2854,13 +2876,13 @@ def take_2d_axis0_int8_int32(int8_t[:, :] values,
 
     IF False:
         cdef:
-            int32_t *v
-            int32_t *o
+            object *v
+            object *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int32_t) and
-            sizeof(int32_t) * n >= 256):
+            values.strides[1] == sizeof(object) and
+            sizeof(object) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -2870,7 +2892,7 @@ def take_2d_axis0_int8_int32(int8_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
+                    memmove(o, v, <size_t>(sizeof(object) * k))
             return
 
     for i from 0 <= i < n:
@@ -2880,32 +2902,32 @@ def take_2d_axis0_int8_int32(int8_t[:, :] values,
                 out[i, j] = fv
         else:
             for j from 0 <= j < k:
-                out[i, j] = values[idx, j]
+                out[i, j] = True if values[idx, j] > 0 else False
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int8_int64(int8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int8_int8_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int8_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int64_t fv
+        int8_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF False:
+    IF True:
         cdef:
-            int64_t *v
-            int64_t *o
+            int8_t *v
+            int8_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int64_t) and
-            sizeof(int64_t) * n >= 256):
+            values.strides[1] == sizeof(int8_t) and
+            sizeof(int8_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -2915,7 +2937,7 @@ def take_2d_axis0_int8_int64(int8_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int8_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -2927,30 +2949,41 @@ def take_2d_axis0_int8_int64(int8_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int8_float64(int8_t[:, :] values,
+def take_2d_axis0_int8_int8(ndarray[int8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
+                                    int8_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int8_int8_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int8_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF False:
+    IF True:
         cdef:
-            float64_t *v
-            float64_t *o
+            int8_t *v
+            int8_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float64_t) and
-            sizeof(float64_t) * n >= 256):
+            values.strides[1] == sizeof(int8_t) and
+            sizeof(int8_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -2960,7 +2993,7 @@ def take_2d_axis0_int8_float64(int8_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int8_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -2974,28 +3007,28 @@ def take_2d_axis0_int8_float64(int8_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int16_int16(int16_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int16_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int8_int32_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int16_t fv
+        int32_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF True:
+    IF False:
         cdef:
-            int16_t *v
-            int16_t *o
+            int32_t *v
+            int32_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int16_t) and
-            sizeof(int16_t) * n >= 256):
+            values.strides[1] == sizeof(int32_t) and
+            sizeof(int32_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3005,7 +3038,7 @@ def take_2d_axis0_int16_int16(int16_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int16_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3017,12 +3050,23 @@ def take_2d_axis0_int16_int16(int16_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int16_int32(int16_t[:, :] values,
+def take_2d_axis0_int8_int32(ndarray[int8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     int32_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int8_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         int32_t fv
@@ -3064,10 +3108,10 @@ def take_2d_axis0_int16_int32(int16_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int16_int64(int16_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int8_int64_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
         int64_t fv
@@ -3107,15 +3151,26 @@ def take_2d_axis0_int16_int64(int16_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int16_float64(int16_t[:, :] values,
+def take_2d_axis0_int8_int64(ndarray[int8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
+                                    int64_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int8_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int64_t fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -3124,13 +3179,13 @@ def take_2d_axis0_int16_float64(int16_t[:, :] values,
 
     IF False:
         cdef:
-            float64_t *v
-            float64_t *o
+            int64_t *v
+            int64_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float64_t) and
-            sizeof(float64_t) * n >= 256):
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3140,7 +3195,7 @@ def take_2d_axis0_int16_float64(int16_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3154,28 +3209,28 @@ def take_2d_axis0_int16_float64(int16_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int32_int32(int32_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int32_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int8_float64_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int32_t fv
+        float64_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF True:
+    IF False:
         cdef:
-            int32_t *v
-            int32_t *o
+            float64_t *v
+            float64_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int32_t) and
-            sizeof(int32_t) * n >= 256):
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3185,7 +3240,7 @@ def take_2d_axis0_int32_int32(int32_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3197,15 +3252,26 @@ def take_2d_axis0_int32_int32(int32_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int32_int64(int32_t[:, :] values,
+def take_2d_axis0_int8_float64(ndarray[int8_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
+                                    float64_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int8_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int64_t fv
+        float64_t fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -3214,13 +3280,13 @@ def take_2d_axis0_int32_int64(int32_t[:, :] values,
 
     IF False:
         cdef:
-            int64_t *v
-            int64_t *o
+            float64_t *v
+            float64_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int64_t) and
-            sizeof(int64_t) * n >= 256):
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3230,7 +3296,7 @@ def take_2d_axis0_int32_int64(int32_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3244,28 +3310,28 @@ def take_2d_axis0_int32_int64(int32_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int32_float64(int32_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int16_int16_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int16_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int16_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF False:
+    IF True:
         cdef:
-            float64_t *v
-            float64_t *o
+            int16_t *v
+            int16_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float64_t) and
-            sizeof(float64_t) * n >= 256):
+            values.strides[1] == sizeof(int16_t) and
+            sizeof(int16_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3275,7 +3341,7 @@ def take_2d_axis0_int32_float64(int32_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int16_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3287,15 +3353,26 @@ def take_2d_axis0_int32_float64(int32_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int64_int64(int64_t[:, :] values,
+def take_2d_axis0_int16_int16(ndarray[int16_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
+                                    int16_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int16_int16_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int64_t fv
+        int16_t fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -3304,13 +3381,13 @@ def take_2d_axis0_int64_int64(int64_t[:, :] values,
 
     IF True:
         cdef:
-            int64_t *v
-            int64_t *o
+            int16_t *v
+            int16_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(int64_t) and
-            sizeof(int64_t) * n >= 256):
+            values.strides[1] == sizeof(int16_t) and
+            sizeof(int16_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3320,7 +3397,7 @@ def take_2d_axis0_int64_int64(int64_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int16_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3334,13 +3411,13 @@ def take_2d_axis0_int64_int64(int64_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_int64_float64(int64_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int16_int32_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int32_t fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -3349,13 +3426,13 @@ def take_2d_axis0_int64_float64(int64_t[:, :] values,
 
     IF False:
         cdef:
-            float64_t *v
-            float64_t *o
+            int32_t *v
+            int32_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float64_t) and
-            sizeof(float64_t) * n >= 256):
+            values.strides[1] == sizeof(int32_t) and
+            sizeof(int32_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3365,7 +3442,7 @@ def take_2d_axis0_int64_float64(int64_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3377,30 +3454,41 @@ def take_2d_axis0_int64_float64(int64_t[:, :] values,
             for j from 0 <= j < k:
                 out[i, j] = values[idx, j]
 
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_float32_float32(float32_t[:, :] values,
+def take_2d_axis0_int16_int32(ndarray[int16_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    float32_t[:, :] out,
+                                    int32_t[:, :] out,
                                     fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int16_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float32_t fv
+        int32_t fv
 
     n = len(indexer)
     k = values.shape[1]
 
     fv = fill_value
 
-    IF True:
+    IF False:
         cdef:
-            float32_t *v
-            float32_t *o
+            int32_t *v
+            int32_t *o
 
         #GH3130
         if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float32_t) and
-            sizeof(float32_t) * n >= 256):
+            values.strides[1] == sizeof(int32_t) and
+            sizeof(int32_t) * n >= 256):
 
             for i from 0 <= i < n:
                 idx = indexer[i]
@@ -3410,7 +3498,7 @@ def take_2d_axis0_float32_float32(float32_t[:, :] values,
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float32_t) * k))
+                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
             return
 
     for i from 0 <= i < n:
@@ -3424,13 +3512,13 @@ def take_2d_axis0_float32_float32(float32_t[:, :] values,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_float32_float64(float32_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis0_int16_int64_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int64_t fv
 
     n = len(indexer)
     k = values.shape[1]
@@ -3439,9 +3527,166 @@ def take_2d_axis0_float32_float64(float32_t[:, :] values,
 
     IF False:
         cdef:
-            float64_t *v
-            float64_t *o
-
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int16_int64(ndarray[int16_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int16_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int16_float64_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int16_float64(ndarray[int16_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int16_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
         #GH3130
         if (values.strides[1] == out.strides[1] and
             values.strides[1] == sizeof(float64_t) and
@@ -3459,114 +3704,1528 @@ def take_2d_axis0_float32_float64(float32_t[:, :] values,
             return
 
     for i from 0 <= i < n:
-        idx = indexer[i]
-        if idx == -1:
-            for j from 0 <= j < k:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int32_int32_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int32_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            int32_t *v
+            int32_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int32_t) and
+            sizeof(int32_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int32_int32(ndarray[int32_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int32_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int32_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int32_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            int32_t *v
+            int32_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int32_t) and
+            sizeof(int32_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int32_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int32_int64_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int32_int64(ndarray[int32_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int32_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int32_float64_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int32_float64(ndarray[int32_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int32_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int64_int64_memview(int64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int64_int64(ndarray[int64_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int64_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            int64_t *v
+            int64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(int64_t) and
+            sizeof(int64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(int64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_int64_float64_memview(int64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_int64_float64(ndarray[int64_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_int64_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_float32_float32_memview(float32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float32_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float32_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            float32_t *v
+            float32_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float32_t) and
+            sizeof(float32_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float32_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_float32_float32(ndarray[float32_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float32_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_float32_float32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float32_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            float32_t *v
+            float32_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float32_t) and
+            sizeof(float32_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float32_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_float32_float64_memview(float32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_float32_float64(ndarray[float32_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_float32_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_float64_float64_memview(float64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_float64_float64(ndarray[float64_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_float64_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF True:
+        cdef:
+            float64_t *v
+            float64_t *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(float64_t) and
+            sizeof(float64_t) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis0_object_object_memview(object[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    object[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        object fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            object *v
+            object *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(object) and
+            sizeof(object) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(object) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis0_object_object(ndarray[object, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    object[:, :] out,
+                                    fill_value=np.nan):
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis0_object_object_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        object fv
+
+    n = len(indexer)
+    k = values.shape[1]
+
+    fv = fill_value
+
+    IF False:
+        cdef:
+            object *v
+            object *o
+
+        #GH3130
+        if (values.strides[1] == out.strides[1] and
+            values.strides[1] == sizeof(object) and
+            sizeof(object) * n >= 256):
+
+            for i from 0 <= i < n:
+                idx = indexer[i]
+                if idx == -1:
+                    for j from 0 <= j < k:
+                        out[i, j] = fv
+                else:
+                    v = &values[idx, 0]
+                    o = &out[i, 0]
+                    memmove(o, v, <size_t>(sizeof(object) * k))
+            return
+
+    for i from 0 <= i < n:
+        idx = indexer[i]
+        if idx == -1:
+            for j from 0 <= j < k:
+                out[i, j] = fv
+        else:
+            for j from 0 <= j < k:
+                out[i, j] = values[idx, j]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_bool_bool_memview(uint8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    uint8_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        uint8_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_bool_bool(ndarray[uint8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    uint8_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_bool_bool_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        uint8_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_bool_object_memview(uint8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    object[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        object fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = True if values[i, idx] > 0 else False
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_bool_object(ndarray[uint8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    object[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_bool_object_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        object fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = True if values[i, idx] > 0 else False
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int8_int8_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int8_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int8_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_int8_int8(ndarray[int8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int8_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int8_int8_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int8_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int8_int32_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int32_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_int8_int32(ndarray[int8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int32_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int8_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int32_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int8_int64_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_int8_int64(ndarray[int8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int64_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int8_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int8_float64_memview(int8_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_int8_float64(ndarray[int8_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    float64_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int8_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int16_int16_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int16_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int16_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_2d_axis1_int16_int16(ndarray[int16_t, ndim=2] values,
+                                    ndarray[int64_t] indexer,
+                                    int16_t[:, :] out,
+                                    fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int16_int16_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int16_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int16_int32_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int32_t fv
+
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
+
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
                 out[i, j] = fv
-        else:
-            for j from 0 <= j < k:
-                out[i, j] = values[idx, j]
+            else:
+                out[i, j] = values[i, idx]
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_float64_float64(float64_t[:, :] values,
+def take_2d_axis1_int16_int32(ndarray[int16_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
+                                    int32_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int16_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        int32_t fv
 
-    n = len(indexer)
-    k = values.shape[1]
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
 
     fv = fill_value
 
-    IF True:
-        cdef:
-            float64_t *v
-            float64_t *o
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int16_int64_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        int64_t fv
 
-        #GH3130
-        if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(float64_t) and
-            sizeof(float64_t) * n >= 256):
+    n = len(values)
+    k = len(indexer)
 
-            for i from 0 <= i < n:
-                idx = indexer[i]
-                if idx == -1:
-                    for j from 0 <= j < k:
-                        out[i, j] = fv
-                else:
-                    v = &values[idx, 0]
-                    o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(float64_t) * k))
-            return
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
 
     for i from 0 <= i < n:
-        idx = indexer[i]
-        if idx == -1:
-            for j from 0 <= j < k:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
                 out[i, j] = fv
-        else:
-            for j from 0 <= j < k:
-                out[i, j] = values[idx, j]
+            else:
+                out[i, j] = values[i, idx]
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis0_object_object(object[:, :] values,
+def take_2d_axis1_int16_int64(ndarray[int16_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    object[:, :] out,
+                                    int64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int16_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        object fv
+        int64_t fv
 
-    n = len(indexer)
-    k = values.shape[1]
+    n = len(values)
+    k = len(indexer)
+
+    if n == 0 or k == 0:
+        return
 
     fv = fill_value
 
-    IF False:
-        cdef:
-            object *v
-            object *o
+    for i from 0 <= i < n:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
+                out[i, j] = fv
+            else:
+                out[i, j] = values[i, idx]
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline take_2d_axis1_int16_float64_memview(int16_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
+    cdef:
+        Py_ssize_t i, j, k, n, idx
+        float64_t fv
 
-        #GH3130
-        if (values.strides[1] == out.strides[1] and
-            values.strides[1] == sizeof(object) and
-            sizeof(object) * n >= 256):
+    n = len(values)
+    k = len(indexer)
 
-            for i from 0 <= i < n:
-                idx = indexer[i]
-                if idx == -1:
-                    for j from 0 <= j < k:
-                        out[i, j] = fv
-                else:
-                    v = &values[idx, 0]
-                    o = &out[i, 0]
-                    memmove(o, v, <size_t>(sizeof(object) * k))
-            return
+    if n == 0 or k == 0:
+        return
+
+    fv = fill_value
 
     for i from 0 <= i < n:
-        idx = indexer[i]
-        if idx == -1:
-            for j from 0 <= j < k:
+        for j from 0 <= j < k:
+            idx = indexer[j]
+            if idx == -1:
                 out[i, j] = fv
-        else:
-            for j from 0 <= j < k:
-                out[i, j] = values[idx, j]
+            else:
+                out[i, j] = values[i, idx]
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_bool_bool(uint8_t[:, :] values,
+def take_2d_axis1_int16_float64(ndarray[int16_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    uint8_t[:, :] out,
+                                    float64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int16_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        uint8_t fv
+        float64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3583,16 +5242,15 @@ def take_2d_axis1_bool_bool(uint8_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_bool_object(uint8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    object[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_int32_int32_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int32_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        object fv
+        int32_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3608,17 +5266,28 @@ def take_2d_axis1_bool_object(uint8_t[:, :] values,
             if idx == -1:
                 out[i, j] = fv
             else:
-                out[i, j] = True if values[i, idx] > 0 else False
+                out[i, j] = values[i, idx]
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int8_int8(int8_t[:, :] values,
+def take_2d_axis1_int32_int32(ndarray[int32_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int8_t[:, :] out,
+                                    int32_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int32_int32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int8_t fv
+        int32_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3635,16 +5304,15 @@ def take_2d_axis1_int8_int8(int8_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int8_int32(int8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int32_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_int32_int64_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int32_t fv
+        int64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3662,12 +5330,23 @@ def take_2d_axis1_int8_int32(int8_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int8_int64(int8_t[:, :] values,
+def take_2d_axis1_int32_int64(ndarray[int32_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     int64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int32_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         int64_t fv
@@ -3687,13 +5366,12 @@ def take_2d_axis1_int8_int64(int8_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int8_float64(int8_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_int32_float64_memview(int32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
         float64_t fv
@@ -3714,15 +5392,26 @@ def take_2d_axis1_int8_float64(int8_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int16_int16(int16_t[:, :] values,
+def take_2d_axis1_int32_float64(ndarray[int32_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int16_t[:, :] out,
+                                    float64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int32_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int16_t fv
+        float64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3739,16 +5428,15 @@ def take_2d_axis1_int16_int16(int16_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int16_int32(int16_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int32_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_int64_int64_memview(int64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int32_t fv
+        int64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3766,12 +5454,23 @@ def take_2d_axis1_int16_int32(int16_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int16_int64(int16_t[:, :] values,
+def take_2d_axis1_int64_int64(ndarray[int64_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     int64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int64_int64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         int64_t fv
@@ -3791,13 +5490,12 @@ def take_2d_axis1_int16_int64(int16_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int16_float64(int16_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_int64_float64_memview(int64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
         float64_t fv
@@ -3818,15 +5516,26 @@ def take_2d_axis1_int16_float64(int16_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int32_int32(int32_t[:, :] values,
+def take_2d_axis1_int64_float64(ndarray[int64_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    int32_t[:, :] out,
+                                    float64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_int64_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int32_t fv
+        float64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3843,16 +5552,15 @@ def take_2d_axis1_int32_int32(int32_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int32_int64(int32_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_float32_float32_memview(float32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float32_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int64_t fv
+        float32_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3870,15 +5578,26 @@ def take_2d_axis1_int32_int64(int32_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int32_float64(int32_t[:, :] values,
+def take_2d_axis1_float32_float32(ndarray[float32_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
+                                    float32_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_float32_float32_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        float32_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3895,16 +5614,15 @@ def take_2d_axis1_int32_float64(int32_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int64_int64(int64_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    int64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_float32_float64_memview(float32_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        int64_t fv
+        float64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3922,12 +5640,23 @@ def take_2d_axis1_int64_int64(int64_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_int64_float64(int64_t[:, :] values,
+def take_2d_axis1_float32_float64(ndarray[float32_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     float64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_float32_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         float64_t fv
@@ -3947,16 +5676,15 @@ def take_2d_axis1_int64_float64(int64_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_float32_float32(float32_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float32_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_float64_float64_memview(float64_t[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    float64_t[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float32_t fv
+        float64_t fv
 
     n = len(values)
     k = len(indexer)
@@ -3974,12 +5702,23 @@ def take_2d_axis1_float32_float32(float32_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_float32_float64(float32_t[:, :] values,
+def take_2d_axis1_float64_float64(ndarray[float64_t, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     float64_t[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_float64_float64_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         float64_t fv
@@ -3999,16 +5738,15 @@ def take_2d_axis1_float32_float64(float32_t[:, :] values,
                 out[i, j] = fv
             else:
                 out[i, j] = values[i, idx]
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_float64_float64(float64_t[:, :] values,
-                                    ndarray[int64_t] indexer,
-                                    float64_t[:, :] out,
-                                    fill_value=np.nan):
+cdef inline take_2d_axis1_object_object_memview(object[:, :] values,
+                                                    ndarray[int64_t] indexer,
+                                                    object[:, :] out,
+                                                    fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        float64_t fv
+        object fv
 
     n = len(values)
     k = len(indexer)
@@ -4026,12 +5764,23 @@ def take_2d_axis1_float64_float64(float64_t[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def take_2d_axis1_object_object(object[:, :] values,
+def take_2d_axis1_object_object(ndarray[object, ndim=2] values,
                                     ndarray[int64_t] indexer,
                                     object[:, :] out,
                                     fill_value=np.nan):
+
+    if values.flags.writeable:
+        # We can call the memoryview version of the code
+        take_2d_axis1_object_object_memview(values, indexer, out,
+                                                fill_value=fill_value)
+        return
+
+    # We cannot use the memoryview version on readonly-buffers due to
+    # a limitation of Cython's typed memoryviews. Instead we can use
+    # the slightly slower Cython ndarray type directly.
     cdef:
         Py_ssize_t i, j, k, n, idx
         object fv
@@ -4052,7 +5801,6 @@ def take_2d_axis1_object_object(object[:, :] values,
             else:
                 out[i, j] = values[i, idx]
 
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def take_2d_multi_bool_bool(ndarray[uint8_t, ndim=2] values,
@@ -5784,7 +7532,8 @@ def group_ohlc_float64(ndarray[float64_t, ndim=2] out,
 
     b = 0
     if K > 1:
-        raise NotImplementedError
+        raise NotImplementedError("Argument 'values' must have only "
+                                  "one dimension")
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
@@ -5857,7 +7606,8 @@ def group_ohlc_float32(ndarray[float32_t, ndim=2] out,
 
     b = 0
     if K > 1:
-        raise NotImplementedError
+        raise NotImplementedError("Argument 'values' must have only "
+                                  "one dimension")
     else:
         for i in range(N):
             while b < ngroups - 1 and i >= bins[b]:
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 0daea15e617a3..3282a36bda7b8 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -628,8 +628,9 @@ def _test_dtype(dtype, fill_value, out_dtype):
         _test_dtype(np.bool_, '', np.object_)
 
     def test_2d_with_out(self):
-        def _test_dtype(dtype, can_hold_na):
+        def _test_dtype(dtype, can_hold_na, writeable=True):
             data = np.random.randint(0, 2, (5, 3)).astype(dtype)
+            data.flags.writeable = writeable
 
             indexer = [2, 1, 0, 1]
             out0 = np.empty((4, 3), dtype=dtype)
@@ -660,18 +661,22 @@ def _test_dtype(dtype, can_hold_na):
                     # no exception o/w
                     data.take(indexer, out=out, axis=i)
 
-        _test_dtype(np.float64, True)
-        _test_dtype(np.float32, True)
-        _test_dtype(np.uint64, False)
-        _test_dtype(np.uint32, False)
-        _test_dtype(np.uint16, False)
-        _test_dtype(np.uint8, False)
-        _test_dtype(np.int64, False)
-        _test_dtype(np.int32, False)
-        _test_dtype(np.int16, False)
-        _test_dtype(np.int8, False)
-        _test_dtype(np.object_, True)
-        _test_dtype(np.bool, False)
+        for writeable in [True, False]:
+            # Check that take_nd works both with writeable arrays (in which
+            # case fast typed memoryviews implementation) and read-only
+            # arrays alike.
+            _test_dtype(np.float64, True, writeable=writeable)
+            _test_dtype(np.float32, True, writeable=writeable)
+            _test_dtype(np.uint64, False, writeable=writeable)
+            _test_dtype(np.uint32, False, writeable=writeable)
+            _test_dtype(np.uint16, False, writeable=writeable)
+            _test_dtype(np.uint8, False, writeable=writeable)
+            _test_dtype(np.int64, False, writeable=writeable)
+            _test_dtype(np.int32, False, writeable=writeable)
+            _test_dtype(np.int16, False, writeable=writeable)
+            _test_dtype(np.int8, False, writeable=writeable)
+            _test_dtype(np.object_, True, writeable=writeable)
+            _test_dtype(np.bool, False, writeable=writeable)
 
     def test_2d_fill_nonna(self):
         def _test_dtype(dtype, fill_value, out_dtype):

From 00cd0defb08b65e377d105a7f1a6b75015a31f3f Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 6 May 2015 19:23:31 -0400
Subject: [PATCH 165/239] BUG: use memory view for indexer in take_2d / tests
 (GH10043)

---
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 pandas/src/generate_code.py     |  4 +-
 pandas/src/generated.pyx        | 76 ++++++++++++++++-----------------
 pandas/tests/test_indexing.py   | 19 +++++++++
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a8f512108f3ce..dcb901d1a6865 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -272,7 +272,7 @@ Bug Fixes
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
 - Bug with ``TimedeltaIndex`` constructor ignoring ``name`` when given another ``TimedeltaIndex`` as data (:issue:`10025`).
 - Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`)
-
+- Bug in ``.loc`` with a read-only ndarray data source (:issue:`10043`)
 - Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`)
 
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 3b71d1c083ba1..a0cdc0ff5e841 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -139,7 +139,7 @@ def take_1d_%(name)s_%(dest)s(ndarray[%(c_type_in)s] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_%(name)s_%(dest)s_memview(%(c_type_in)s[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     %(c_type_out)s[:, :] out,
                                                     fill_value=np.nan):
 """ + inner_take_2d_axis0_template + """
@@ -188,7 +188,7 @@ def take_2d_axis0_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_%(name)s_%(dest)s_memview(%(c_type_in)s[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     %(c_type_out)s[:, :] out,
                                                     fill_value=np.nan):
 """ + inner_take_2d_axis1_template + """
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index ac31fdedf2ea6..79722a26ebedc 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -2705,7 +2705,7 @@ def take_1d_object_object(ndarray[object] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_bool_bool_memview(uint8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     uint8_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -2806,7 +2806,7 @@ def take_2d_axis0_bool_bool(ndarray[uint8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_bool_object_memview(uint8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     object[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -2907,7 +2907,7 @@ def take_2d_axis0_bool_object(ndarray[uint8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int8_int8_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int8_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3008,7 +3008,7 @@ def take_2d_axis0_int8_int8(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int8_int32_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3109,7 +3109,7 @@ def take_2d_axis0_int8_int32(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int8_int64_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3210,7 +3210,7 @@ def take_2d_axis0_int8_int64(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int8_float64_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3311,7 +3311,7 @@ def take_2d_axis0_int8_float64(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int16_int16_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int16_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3412,7 +3412,7 @@ def take_2d_axis0_int16_int16(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int16_int32_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3513,7 +3513,7 @@ def take_2d_axis0_int16_int32(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int16_int64_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3614,7 +3614,7 @@ def take_2d_axis0_int16_int64(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int16_float64_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3715,7 +3715,7 @@ def take_2d_axis0_int16_float64(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int32_int32_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3816,7 +3816,7 @@ def take_2d_axis0_int32_int32(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int32_int64_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -3917,7 +3917,7 @@ def take_2d_axis0_int32_int64(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int32_float64_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4018,7 +4018,7 @@ def take_2d_axis0_int32_float64(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int64_int64_memview(int64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4119,7 +4119,7 @@ def take_2d_axis0_int64_int64(ndarray[int64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_int64_float64_memview(int64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4220,7 +4220,7 @@ def take_2d_axis0_int64_float64(ndarray[int64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_float32_float32_memview(float32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4321,7 +4321,7 @@ def take_2d_axis0_float32_float32(ndarray[float32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_float32_float64_memview(float32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4422,7 +4422,7 @@ def take_2d_axis0_float32_float64(ndarray[float32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_float64_float64_memview(float64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4523,7 +4523,7 @@ def take_2d_axis0_float64_float64(ndarray[float64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis0_object_object_memview(object[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     object[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4625,7 +4625,7 @@ def take_2d_axis0_object_object(ndarray[object, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_bool_bool_memview(uint8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     uint8_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4687,7 +4687,7 @@ def take_2d_axis1_bool_bool(ndarray[uint8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_bool_object_memview(uint8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     object[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4749,7 +4749,7 @@ def take_2d_axis1_bool_object(ndarray[uint8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int8_int8_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int8_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4811,7 +4811,7 @@ def take_2d_axis1_int8_int8(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int8_int32_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4873,7 +4873,7 @@ def take_2d_axis1_int8_int32(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int8_int64_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4935,7 +4935,7 @@ def take_2d_axis1_int8_int64(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int8_float64_memview(int8_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -4997,7 +4997,7 @@ def take_2d_axis1_int8_float64(ndarray[int8_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int16_int16_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int16_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5059,7 +5059,7 @@ def take_2d_axis1_int16_int16(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int16_int32_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5121,7 +5121,7 @@ def take_2d_axis1_int16_int32(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int16_int64_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5183,7 +5183,7 @@ def take_2d_axis1_int16_int64(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int16_float64_memview(int16_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5245,7 +5245,7 @@ def take_2d_axis1_int16_float64(ndarray[int16_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int32_int32_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5307,7 +5307,7 @@ def take_2d_axis1_int32_int32(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int32_int64_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5369,7 +5369,7 @@ def take_2d_axis1_int32_int64(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int32_float64_memview(int32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5431,7 +5431,7 @@ def take_2d_axis1_int32_float64(ndarray[int32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int64_int64_memview(int64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     int64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5493,7 +5493,7 @@ def take_2d_axis1_int64_int64(ndarray[int64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_int64_float64_memview(int64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5555,7 +5555,7 @@ def take_2d_axis1_int64_float64(ndarray[int64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_float32_float32_memview(float32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float32_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5617,7 +5617,7 @@ def take_2d_axis1_float32_float32(ndarray[float32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_float32_float64_memview(float32_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5679,7 +5679,7 @@ def take_2d_axis1_float32_float64(ndarray[float32_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_float64_float64_memview(float64_t[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     float64_t[:, :] out,
                                                     fill_value=np.nan):
     cdef:
@@ -5741,7 +5741,7 @@ def take_2d_axis1_float64_float64(ndarray[float64_t, ndim=2] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline take_2d_axis1_object_object_memview(object[:, :] values,
-                                                    ndarray[int64_t] indexer,
+                                                    int64_t[:] indexer,
                                                     object[:, :] out,
                                                     fill_value=np.nan):
     cdef:
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 1bea124bb9f81..c998ce65791a3 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -4530,6 +4530,25 @@ def test_loc_listlike(self):
         # not all labels in the categories
         self.assertRaises(KeyError, lambda : self.df2.loc[['a','d']])
 
+    def test_read_only_source(self):
+        # GH 10043
+        rw_array = np.eye(10)
+        rw_df = DataFrame(rw_array)
+
+        ro_array = np.eye(10)
+        ro_array.setflags(write=False)
+        ro_df = DataFrame(ro_array)
+
+        assert_frame_equal(rw_df.iloc[[1,2,3]],ro_df.iloc[[1,2,3]])
+        assert_frame_equal(rw_df.iloc[[1]],ro_df.iloc[[1]])
+        assert_series_equal(rw_df.iloc[1],ro_df.iloc[1])
+        assert_frame_equal(rw_df.iloc[1:3],ro_df.iloc[1:3])
+
+        assert_frame_equal(rw_df.loc[[1,2,3]],ro_df.loc[[1,2,3]])
+        assert_frame_equal(rw_df.loc[[1]],ro_df.loc[[1]])
+        assert_series_equal(rw_df.loc[1],ro_df.loc[1])
+        assert_frame_equal(rw_df.loc[1:3],ro_df.loc[1:3])
+
     def test_reindexing(self):
 
         # reindexing

From 4bed7c9071974f721d888a19bd05f889673acf04 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 25 Apr 2015 10:32:11 -0700
Subject: [PATCH 166/239] ENH: improve extract and get_dummies methods for
 Index.str (fix for #9980)

simplify str_extract(), pass name into _wrap_result()
---
 doc/source/whatsnew/v0.16.1.txt |  3 ++
 pandas/core/strings.py          | 35 ++++++++++++++-------
 pandas/tests/test_strings.py    | 54 ++++++++++++++++++++++-----------
 3 files changed, 63 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index dcb901d1a6865..4b3777afaec73 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -40,6 +40,7 @@ Enhancements
      Timestamp('2014-08-01 16:30') + BusinessHour()
 
 - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
+
 - Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
 
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
@@ -216,6 +217,8 @@ enhancements are performed to make string operation easier.
      idx.str.startswith('a')
      s[s.index.str.startswith('a')]
 
+- Improved ``extract`` and ``get_dummies`` methods for ``Index.str`` (:issue:`9980`)
+
 .. _whatsnew_0161.api:
 
 API changes
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 62e9e0fbc41ae..6e603f60e02a2 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -466,6 +466,7 @@ def str_extract(arr, pat, flags=0):
     """
     from pandas.core.series import Series
     from pandas.core.frame import DataFrame
+    from pandas.core.index import Index
 
     regex = re.compile(pat, flags=flags)
     # just to be safe, check this
@@ -481,11 +482,14 @@ def f(x):
             return [np.nan if item is None else item for item in m.groups()]
         else:
             return empty_row
+
     if regex.groups == 1:
-        result = Series([f(val)[0] for val in arr],
-                        name=_get_single_group_name(regex),
-                        index=arr.index, dtype=object)
+        result = np.array([f(val)[0] for val in arr], dtype=object)
+        name = _get_single_group_name(regex)
     else:
+        if isinstance(arr, Index):
+            raise ValueError("only one regex group is supported with Index")
+        name = None
         names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
         columns = [names.get(1 + i, i) for i in range(regex.groups)]
         if arr.empty:
@@ -495,7 +499,7 @@ def f(x):
                                columns=columns,
                                index=arr.index,
                                dtype=object)
-    return result
+    return result, name
 
 
 def str_get_dummies(arr, sep='|'):
@@ -531,6 +535,11 @@ def str_get_dummies(arr, sep='|'):
     pandas.get_dummies
     """
     from pandas.core.frame import DataFrame
+    from pandas.core.index import Index
+
+    # GH9980, Index.str does not support get_dummies() as it returns a frame
+    if isinstance(arr, Index):
+        raise TypeError("get_dummies is not supported for string methods on Index")
 
     # TODO remove this hack?
     arr = arr.fillna('')
@@ -991,7 +1000,7 @@ def __iter__(self):
             i += 1
             g = self.get(i)
 
-    def _wrap_result(self, result):
+    def _wrap_result(self, result, **kwargs):
         # leave as it is to keep extract and get_dummies results
         # can be merged to _wrap_result_expand in v0.17
         from pandas.core.series import Series
@@ -1000,16 +1009,16 @@ def _wrap_result(self, result):
 
         if not hasattr(result, 'ndim'):
             return result
-        elif result.ndim == 1:
-            name = getattr(result, 'name', None)
+        name = kwargs.get('name') or getattr(result, 'name', None) or self.series.name
+
+        if result.ndim == 1:
             if isinstance(self.series, Index):
                 # if result is a boolean np.array, return the np.array
                 # instead of wrapping it into a boolean Index (GH 8875)
                 if is_bool_dtype(result):
                     return result
-                return Index(result, name=name or self.series.name)
-            return Series(result, index=self.series.index,
-                          name=name or self.series.name)
+                return Index(result, name=name)
+            return Series(result, index=self.series.index, name=name)
         else:
             assert result.ndim < 3
             return DataFrame(result, index=self.series.index)
@@ -1257,7 +1266,11 @@ def get_dummies(self, sep='|'):
     startswith = _pat_wrapper(str_startswith, na=True)
     endswith = _pat_wrapper(str_endswith, na=True)
     findall = _pat_wrapper(str_findall, flags=True)
-    extract = _pat_wrapper(str_extract, flags=True)
+
+    @copy(str_extract)
+    def extract(self, pat, flags=0):
+        result, name = str_extract(self.series, pat, flags=flags)
+        return self._wrap_result(result, name=name)
 
     _shared_docs['find'] = ("""
     Return %(side)s indexes in each strings in the Series/Index
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 1f84e1dc4d155..c9b11810d83fe 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -516,7 +516,6 @@ def test_match(self):
 
     def test_extract(self):
         # Contains tests like those in test_match and some others.
-
         values = Series(['fooBAD__barBAD', NA, 'foo'])
         er = [NA, NA]  # empty row
 
@@ -540,15 +539,31 @@ def test_extract(self):
         exp = DataFrame([[u('BAD__'), u('BAD')], er, er])
         tm.assert_frame_equal(result, exp)
 
-        # no groups
-        s = Series(['A1', 'B2', 'C3'])
-        f = lambda: s.str.extract('[ABC][123]')
-        self.assertRaises(ValueError, f)
-
-        # only non-capturing groups
-        f = lambda: s.str.extract('(?:[AB]).*')
-        self.assertRaises(ValueError, f)
+        # GH9980
+        # Index only works with one regex group since
+        # multi-group would expand to a frame
+        idx = Index(['A1', 'A2', 'A3', 'A4', 'B5'])
+        with tm.assertRaisesRegexp(ValueError, "supported"):
+            idx.str.extract('([AB])([123])')
+
+        # these should work for both Series and Index
+        for klass in [Series, Index]:
+            # no groups
+            s_or_idx = klass(['A1', 'B2', 'C3'])
+            f = lambda: s_or_idx.str.extract('[ABC][123]')
+            self.assertRaises(ValueError, f)
+
+            # only non-capturing groups
+            f = lambda: s_or_idx.str.extract('(?:[AB]).*')
+            self.assertRaises(ValueError, f)
+
+            # single group renames series/index properly
+            s_or_idx = klass(['A1', 'A2'])
+            result = s_or_idx.str.extract(r'(?P<uno>A)\d')
+            tm.assert_equal(result.name, 'uno')
+            tm.assert_array_equal(result, klass(['A', 'A']))
 
+        s = Series(['A1', 'B2', 'C3'])
         # one group, no matches
         result = s.str.extract('(_)')
         exp = Series([NA, NA, NA], dtype=object)
@@ -569,14 +584,16 @@ def test_extract(self):
         exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]])
         tm.assert_frame_equal(result, exp)
 
-        # named group/groups
-        result = s.str.extract('(?P<letter>[AB])(?P<number>[123])')
-        exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], columns=['letter', 'number'])
-        tm.assert_frame_equal(result, exp)
+        # one named group
         result = s.str.extract('(?P<letter>[AB])')
         exp = Series(['A', 'B', NA], name='letter')
         tm.assert_series_equal(result, exp)
 
+        # two named groups
+        result = s.str.extract('(?P<letter>[AB])(?P<number>[123])')
+        exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], columns=['letter', 'number'])
+        tm.assert_frame_equal(result, exp)
+
         # mix named and unnamed groups
         result = s.str.extract('([AB])(?P<number>[123])')
         exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], columns=[0, 'number'])
@@ -602,11 +619,6 @@ def test_extract(self):
         exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'])
         tm.assert_frame_equal(result, exp)
 
-        # single group renames series properly
-        s = Series(['A1', 'A2'])
-        result = s.str.extract(r'(?P<uno>A)\d')
-        tm.assert_equal(result.name, 'uno')
-
         # GH6348
         # not passing index to the extractor
         def check_index(index):
@@ -761,6 +773,12 @@ def test_get_dummies(self):
                              columns=list('7ab'))
         tm.assert_frame_equal(result, expected)
 
+        # GH9980
+        # Index.str does not support get_dummies() as it returns a frame
+        with tm.assertRaisesRegexp(TypeError, "not supported"):
+            idx = Index(['a|b', 'a|c', 'b|c'])
+            idx.str.get_dummies('|')
+
     def test_join(self):
         values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
         result = values.str.split('_').str.join('_')

From fcf0a060274ec45984fea8e271f5e177d71cd452 Mon Sep 17 00:00:00 2001
From: Jeff Blackburne <jblackburne@gmail.com>
Date: Wed, 29 Apr 2015 12:31:06 -0700
Subject: [PATCH 167/239] Changed a condition in tokenize_delimited to account
 for data chunks that start with newline.

Changed a condition in tokenize_delim_customterm to account for data chunks that start with terminator.

Added a unit test that fails in master and passes in this branch.

Moved new unit test in order to test all parser engines. Added GH issue number.

Added release note.
---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 pandas/io/tests/test_parsers.py | 6 ++++++
 pandas/src/parser/tokenizer.c   | 4 ++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 4b3777afaec73..e643176003f2d 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -351,6 +351,7 @@ Bug Fixes
 - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
 - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
+- Bug in C csv parser causing spurious NaNs when data started with newline followed by whitespace. (:issue:`10022`)
 
 - Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
 - Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index d0289895ceda6..48d625744c787 100755
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2288,6 +2288,12 @@ def test_single_char_leading_whitespace(self):
         result = self.read_csv(StringIO(data), skipinitialspace=True)
         tm.assert_frame_equal(result, expected)
 
+    def test_chunk_begins_with_newline_whitespace(self):
+        # GH 10022
+        data = '\n hello\nworld\n'
+        result = self.read_csv(StringIO(data), header=None)
+        self.assertEqual(len(result), 2)
+
 
 class TestPythonParser(ParserTests, tm.TestCase):
     def test_negative_skipfooter_raises(self):
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index e7b5db9c5e361..3be17f17d6afa 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -854,7 +854,7 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
                     --i;
                 } while (i + 1 > self->datapos && *buf != '\n');
 
-                if (i + 1 > self->datapos) // reached a newline rather than the beginning
+                if (*buf == '\n') // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
                     ++i;
@@ -1172,7 +1172,7 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
                     --i;
                 } while (i + 1 > self->datapos && *buf != self->lineterminator);
 
-                if (i + 1 > self->datapos) // reached a newline rather than the beginning
+                if (*buf == self->lineterminator) // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
                     ++i;

From fb201fd2fe7fe0961ab6eddc745163aa8e6e81de Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 25 Jan 2015 10:46:29 +0900
Subject: [PATCH 168/239] DOC: Revise merging.rst with graphical examples

---
 doc/source/merging.rst          | 793 +++++++++++++++++++++++---------
 doc/source/whatsnew/v0.16.1.txt |   1 +
 pandas/util/doctools.py         | 184 ++++++++
 3 files changed, 756 insertions(+), 222 deletions(-)
 create mode 100644 pandas/util/doctools.py

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 7128e2dd82d6c..8f2f4c9467ac2 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -12,6 +12,12 @@
    randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
 
+   import matplotlib.pyplot as plt
+   plt.close('all')
+   import pandas.util.doctools as doctools
+   p = doctools.TablePlotter()
+
+
 ****************************
 Merge, join, and concatenate
 ****************************
@@ -37,14 +43,34 @@ a simple example:
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(10, 4))
-   df
+   df1 = DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
+                    'B': ['B0', 'B1', 'B2', 'B3'],
+                    'C': ['C0', 'C1', 'C2', 'C3'],
+                    'D': ['D0', 'D1', 'D2', 'D3']},
+                   index=[0, 1, 2, 3])
+
+   df2 = DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
+                    'B': ['B4', 'B5', 'B6', 'B7'],
+                    'C': ['C4', 'C5', 'C6', 'C7'],
+                    'D': ['D4', 'D5', 'D6', 'D7']},
+                   index=[4, 5, 6, 7])
+
+   df3 = DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
+                    'B': ['B8', 'B9', 'B10', 'B11'],
+                    'C': ['C8', 'C9', 'C10', 'C11'],
+                    'D': ['D8', 'D9', 'D10', 'D11']},
+                   index=[8, 9, 10, 11])
+
+   frames = [df1, df2, df3]
+   result = concat(frames)
 
-   # break it into pieces
-   pieces = [df[:3], df[3:7], df[7:]]
+.. ipython:: python
+   :suppress:
 
-   concatenated = concat(pieces)
-   concatenated
+   @savefig merging_concat_basic.png
+   p.plot(frames, result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
 
 Like its sibling function on ndarrays, ``numpy.concatenate``, ``pandas.concat``
 takes a list or dict of homogeneously-typed objects and concatenates them with
@@ -86,8 +112,15 @@ this using the ``keys`` argument:
 
 .. ipython:: python
 
-   concatenated = concat(pieces, keys=['first', 'second', 'third'])
-   concatenated
+   result = concat(frames, keys=['x', 'y', 'z'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_keys.png
+   p.plot(frames, result,
+          labels=['df1', 'df2', 'df3'], vertical=True)
+   plt.close('all');
 
 As you can see (if you've read the rest of the documentation), the resulting
 object's index has a :ref:`hierarchical index <advanced.hierarchical>`. This
@@ -95,7 +128,7 @@ means that we can now do stuff like select out each chunk by key:
 
 .. ipython:: python
 
-   concatenated.ix['second']
+   result.ix['y']
 
 It's not a stretch to see how this can be very useful. More detail on this
 functionality below.
@@ -130,29 +163,50 @@ behavior:
 
 .. ipython:: python
 
-   from pandas.util.testing import rands_array
-   df = DataFrame(np.random.randn(10, 4), columns=['a', 'b', 'c', 'd'],
-                  index=rands_array(5, 10))
-   df
+   df4 = DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
+                    'D': ['D2', 'D3', 'D6', 'D7'],
+                    'F': ['F2', 'F3', 'F6', 'F7']},
+                   index=[2, 3, 6, 7])
+   result = concat([df1, df4], axis=1)
+
+
+.. ipython:: python
+   :suppress:
 
-   concat([df.ix[:7, ['a', 'b']], df.ix[2:-2, ['c']],
-           df.ix[-7:, ['d']]], axis=1)
+   @savefig merging_concat_axis1.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=False);
+   plt.close('all');
 
 Note that the row indexes have been unioned and sorted. Here is the same thing
 with ``join='inner'``:
 
 .. ipython:: python
 
-   concat([df.ix[:7, ['a', 'b']], df.ix[2:-2, ['c']],
-           df.ix[-7:, ['d']]], axis=1, join='inner')
+   result = concat([df1, df4], axis=1, join='inner')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_axis1_inner.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=False);
+   plt.close('all');
 
 Lastly, suppose we just wanted to reuse the *exact index* from the original
 DataFrame:
 
 .. ipython:: python
 
-   concat([df.ix[:7, ['a', 'b']], df.ix[2:-2, ['c']],
-           df.ix[-7:, ['d']]], axis=1, join_axes=[df.index])
+   result = concat([df1, df4], axis=1, join_axes=[df1.index])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_axis1_join_axes.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=False);
+   plt.close('all');
 
 .. _merging.concatenation:
 
@@ -165,32 +219,44 @@ along ``axis=0``, namely the index:
 
 .. ipython:: python
 
-   s = Series(randn(10), index=np.arange(10))
-   s1 = s[:5] # note we're slicing with labels here, so 5 is included
-   s2 = s[6:]
-   s1.append(s2)
+   result = df1.append(df2)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append1.png
+   p.plot([df1, df2], result,
+          labels=['df1', 'df2'], vertical=True);
+   plt.close('all');
 
 In the case of DataFrame, the indexes must be disjoint but the columns do not
 need to be:
 
 .. ipython:: python
 
-   df = DataFrame(randn(6, 4), index=date_range('1/1/2000', periods=6),
-                  columns=['A', 'B', 'C', 'D'])
-   df1 = df.ix[:3]
-   df2 = df.ix[3:, :3]
-   df1
-   df2
-   df1.append(df2)
+   result = df1.append(df4)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append2.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=True);
+   plt.close('all');
 
 ``append`` may take multiple objects to concatenate:
 
 .. ipython:: python
 
-   df1 = df.ix[:2]
-   df2 = df.ix[2:4]
-   df3 = df.ix[4:]
-   df1.append([df2,df3])
+   result = df1.append([df2, df3])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append3.png
+   p.plot([df1, df2, df3], result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
 
 .. note::
 
@@ -205,25 +271,33 @@ Ignoring indexes on the concatenation axis
 For DataFrames which don't have a meaningful index, you may wish to append them
 and ignore the fact that they may have overlapping indexes:
 
-.. ipython:: python
-
-   df1 = DataFrame(randn(6, 4), columns=['A', 'B', 'C', 'D'])
-   df2 = DataFrame(randn(3, 4), columns=['A', 'B', 'C', 'D'])
+To do this, use the ``ignore_index`` argument:
 
-   df1
-   df2
+.. ipython:: python
 
-To do this, use the ``ignore_index`` argument:
+   result = concat([df1, df4], ignore_index=True)
 
 .. ipython:: python
+   :suppress:
 
-   concat([df1, df2], ignore_index=True)
+   @savefig merging_concat_ignore_index.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=True);
+   plt.close('all');
 
 This is also a valid argument to ``DataFrame.append``:
 
 .. ipython:: python
 
-   df1.append(df2, ignore_index=True)
+   result = df1.append(df4, ignore_index=True)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append_ignore_index.png
+   p.plot([df1, df4], result,
+          labels=['df1', 'df4'], vertical=True);
+   plt.close('all');
 
 .. _merging.mixed_ndims:
 
@@ -236,22 +310,45 @@ the name of the Series.
 
 .. ipython:: python
 
-   df1 = DataFrame(randn(6, 4), columns=['A', 'B', 'C', 'D'])
-   s1 = Series(randn(6), name='foo')
-   concat([df1, s1],axis=1)
+   s1 = Series(['X0', 'X1', 'X2', 'X3'], name='X')
+   result = concat([df1, s1], axis=1)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_mixed_ndim.png
+   p.plot([df1, s1], result,
+          labels=['df1', 's1'], vertical=False);
+   plt.close('all');
 
 If unnamed Series are passed they will be numbered consecutively.
 
 .. ipython:: python
 
-   s2 = Series(randn(6))
-   concat([df1, s2, s2, s2],axis=1)
+   s2 = Series(['_0', '_1', '_2', '_3'])
+   result = concat([df1, s2, s2, s2], axis=1)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_unnamed_series.png
+   p.plot([df1, s2], result,
+          labels=['df1', 's2'], vertical=False);
+   plt.close('all');
 
 Passing ``ignore_index=True`` will drop all name references.
 
 .. ipython:: python
 
-   concat([df1, s1],axis=1,ignore_index=True)
+   result = concat([df1, s1], axis=1, ignore_index=True)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_series_ignore_index.png
+   p.plot([df1, s1], result,
+          labels=['df1', 's1'], vertical=False);
+   plt.close('all');
 
 More concatenating with group keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -260,43 +357,71 @@ Let's consider a variation on the first example presented:
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(10, 4))
-   df
+   result = concat(frames, keys=['x', 'y', 'z'])
 
-   # break it into pieces
-   pieces = [df.ix[:, [0, 1]], df.ix[:, [2]], df.ix[:, [3]]]
+.. ipython:: python
+   :suppress:
 
-   result = concat(pieces, axis=1, keys=['one', 'two', 'three'])
-   result
+   @savefig merging_concat_group_keys2.png
+   p.plot(frames, result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
 
 You can also pass a dict to ``concat`` in which case the dict keys will be used
 for the ``keys`` argument (unless other keys are specified):
 
 .. ipython:: python
 
-   pieces = {'one': df.ix[:, [0, 1]],
-             'two': df.ix[:, [2]],
-             'three': df.ix[:, [3]]}
-   concat(pieces, axis=1)
-   concat(pieces, keys=['three', 'two'])
+   pieces = {'x': df1, 'y': df2, 'z': df3}
+   result = concat(pieces)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_dict.png
+   p.plot([df1, df2, df3], result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = concat(pieces, keys=['z', 'y'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_dict_keys.png
+   p.plot([df1, df2, df3], result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
 
 The MultiIndex created has levels that are constructed from the passed keys and
-the columns of the DataFrame pieces:
+the index of the DataFrame pieces:
 
 .. ipython:: python
 
-   result.columns.levels
+   result.index.levels
 
 If you wish to specify other levels (as will occasionally be the case), you can
 do so using the ``levels`` argument:
 
 .. ipython:: python
 
-   result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
-                   levels=[['three', 'two', 'one', 'zero']],
+   result = concat(pieces, keys=['x', 'y', 'z'],
+                   levels=[['z', 'y', 'x', 'w']],
                    names=['group_key'])
-   result
-   result.columns.levels
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_concat_dict_keys_names.png
+   p.plot([df1, df2, df3], result,
+          labels=['df1', 'df2', 'df3'], vertical=True);
+   plt.close('all');
+
+.. ipython:: python
+
+   result.index.levels
 
 Yes, this is fairly esoteric, but is actually necessary for implementing things
 like GroupBy where the order of a categorical variable is meaningful.
@@ -312,10 +437,16 @@ which returns a new DataFrame as above.
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
-   df
-   s = df.xs(3)
-   df.append(s, ignore_index=True)
+   s2 = Series(['X0', 'X1', 'X2', 'X3'], index=['A', 'B', 'C', 'D'])
+   result = df1.append(s2, ignore_index=True)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append_series_as_row.png
+   p.plot([df1, s2], result,
+          labels=['df1', 's2'], vertical=True);
+   plt.close('all');
 
 You should use ``ignore_index`` with this method to instruct DataFrame to
 discard its index. If you wish to preserve the index, you should construct an
@@ -325,12 +456,17 @@ You can also pass a list of dicts or Series:
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(5, 4),
-                  columns=['foo', 'bar', 'baz', 'qux'])
-   dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
-            {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
-   result = df.append(dicts, ignore_index=True)
-   result
+   dicts = [{'A': 1, 'B': 2, 'C': 3, 'X': 4},
+            {'A': 5, 'B': 6, 'C': 7, 'Y': 8}]
+   result = df1.append(dicts, ignore_index=True)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_append_dits.png
+   p.plot([df1, DataFrame(dicts)], result,
+          labels=['df1', 'dicts'], vertical=True);
+   plt.close('all');
 
 .. _merging.join:
 
@@ -430,24 +566,46 @@ key combination:
 
 .. ipython:: python
 
-   left = DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
-   right = DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
-   left
-   right
-   merge(left, right, on='key')
+   left = DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
+                     'A': ['A0', 'A1', 'A2', 'A3'],
+                     'B': ['B0', 'B1', 'B2', 'B3']})
+
+   right = DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
+                      'C': ['C0', 'C1', 'C2', 'C3'],
+                      'D': ['D0', 'D1', 'D2', 'D3']})
+   result = merge(left, right, on='key')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 Here is a more complicated example with multiple join keys:
 
 .. ipython:: python
 
-   left = DataFrame({'key1': ['foo', 'foo', 'bar'],
-                     'key2': ['one', 'two', 'one'],
-                     'lval': [1, 2, 3]})
-   right = DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],
-                      'key2': ['one', 'one', 'one', 'two'],
-                      'rval': [4, 5, 6, 7]})
-   merge(left, right, how='outer')
-   merge(left, right, how='inner')
+   left = DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
+                     'key2': ['K0', 'K1', 'K0', 'K1'],
+                     'A': ['A0', 'A1', 'A2', 'A3'],
+                     'B': ['B0', 'B1', 'B2', 'B3']})
+
+   right = DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
+                      'key2': ['K0', 'K0', 'K0', 'K0'],
+                      'C': ['C0', 'C1', 'C2', 'C3'],
+                      'D': ['D0', 'D1', 'D2', 'D3']})
+
+   result = merge(left, right, on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key_multiple.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 The ``how`` argument to ``merge`` specifies how to determine which keys are to
 be included in the resulting table. If a key combination **does not appear** in
@@ -463,6 +621,53 @@ either the left or right tables, the values in the joined table will be
     ``outer``, ``FULL OUTER JOIN``, Use union of keys from both frames
     ``inner``, ``INNER JOIN``, Use intersection of keys from both frames
 
+.. ipython:: python
+
+   result = merge(left, right, how='left', on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key_left.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = merge(left, right, how='right', on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key_right.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+
+.. ipython:: python
+
+   result = merge(left, right, how='outer', on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key_outer.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = merge(left, right, how='inner', on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_on_key_inner.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
 .. _merging.join.index:
 
 Joining on index
@@ -474,14 +679,47 @@ is a very basic example:
 
 .. ipython:: python
 
-   df = DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
-   df1 = df.ix[1:, ['A', 'B']]
-   df2 = df.ix[:5, ['C', 'D']]
-   df1
-   df2
-   df1.join(df2)
-   df1.join(df2, how='outer')
-   df1.join(df2, how='inner')
+   left = DataFrame({'A': ['A0', 'A1', 'A2'],
+                     'B': ['B0', 'B1', 'B2']},
+                    index=['K0', 'K1', 'K2'])
+
+   right = DataFrame({'C': ['C0', 'C2', 'C3'],
+                      'D': ['D0', 'D2', 'D3']},
+                     index=['K0', 'K2', 'K3'])
+
+   result = left.join(right)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = left.join(right, how='outer')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_outer.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = left.join(right, how='inner')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_inner.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 The data alignment here is on the indexes (row labels). This same behavior can
 be achieved using ``merge`` plus additional arguments instructing it to use the
@@ -489,7 +727,27 @@ indexes:
 
 .. ipython:: python
 
-   merge(df1, df2, left_index=True, right_index=True, how='outer')
+   result = merge(left, right, left_index=True, right_index=True, how='outer')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_index_outer.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = merge(left, right, left_index=True, right_index=True, how='inner');
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_index_inner.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 Joining key columns on an index
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -511,14 +769,36 @@ key), using ``join`` may be more convenient. Here is a simple example:
 
 .. ipython:: python
 
-   df['key'] = ['foo', 'bar'] * 4
-   to_join = DataFrame(randn(2, 2), index=['bar', 'foo'],
-                       columns=['j1', 'j2'])
-   df
-   to_join
-   df.join(to_join, on='key')
-   merge(df, to_join, left_on='key', right_index=True,
-         how='left', sort=False)
+   left = DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
+                     'B': ['B0', 'B1', 'B2', 'B3'],
+                     'key': ['K0', 'K1', 'K0', 'K1']})
+
+   right = DataFrame({'C': ['C0', 'C1'],
+                      'D': ['D0', 'D1']},
+                     index=['K0', 'K1'])
+
+   result = left.join(right, on='key')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_key_columns.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+.. ipython:: python
+
+   result = merge(left, right, left_on='key', right_index=True,
+                  how='left', sort=False);
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_key_columns.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 .. _merging.multikey_join:
 
@@ -526,31 +806,30 @@ To join on multiple keys, the passed DataFrame must have a ``MultiIndex``:
 
 .. ipython:: python
 
-   index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
-                              ['one', 'two', 'three']],
-                      labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                      names=['first', 'second'])
-   to_join = DataFrame(np.random.randn(10, 3), index=index,
-                       columns=['j_one', 'j_two', 'j_three'])
-
-   # a little relevant example with NAs
-   key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
-           'qux', 'snap']
-   key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
-           'three', 'one']
+   left = DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
+                     'B': ['B0', 'B1', 'B2', 'B3'],
+                     'key1': ['K0', 'K0', 'K1', 'K2'],
+                     'key2': ['K0', 'K1', 'K0', 'K1']})
 
-   data = np.random.randn(len(key1))
-   data = DataFrame({'key1' : key1, 'key2' : key2,
-                     'data' : data})
-   data
-   to_join
+   index = MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'),
+                                   ('K2', 'K0'), ('K2', 'K1')])
+   right = DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
+                      'D': ['D0', 'D1', 'D2', 'D3']},
+                     index=index)
 
 Now this can be joined by passing the two key column names:
 
 .. ipython:: python
 
-   data.join(to_join, on=['key1', 'key2'])
+   result = left.join(right, on=['key1', 'key2'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_multikeys.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 .. _merging.df_inner_join:
 
@@ -561,10 +840,92 @@ easily performed:
 
 .. ipython:: python
 
-   data.join(to_join, on=['key1', 'key2'], how='inner')
+   result = left.join(right, on=['key1', 'key2'], how='inner')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_multikeys_inner.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 As you can see, this drops any rows where there was no match.
 
+.. _merging.join_on_mi:
+
+Joining a single Index to a Multi-index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.14.0
+
+You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``.
+The level will match on the name of the index of the singly-indexed frame against
+a level name of the multi-indexed frame.
+
+..  ipython:: python
+
+   left = DataFrame({'A': ['A0', 'A1', 'A2'],
+                     'B': ['B0', 'B1', 'B2']},
+                    index=Index(['K0', 'K1', 'K2'], name='key'))
+
+   index = MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
+                                   ('K2', 'Y2'), ('K2', 'Y3')],
+                                  names=['key', 'Y'])
+   right = DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
+                      'D': ['D0', 'D1', 'D2', 'D3']},
+                     index=index)
+
+   result = left.join(right, how='inner')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_multiindex_inner.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+This is equivalent but less verbose and more memory efficient / faster than this.
+
+..  ipython:: python
+
+    result = merge(left.reset_index(), right.reset_index(),
+          on=['key'], how='inner').set_index(['key','Y'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_multiindex_alternative.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+Joining with two multi-indexes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is not Implemented via ``join`` at-the-moment, however it can be done using the following.
+
+.. ipython:: python
+
+   index = MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
+                                   ('K1', 'X2')],
+                                  names=['key', 'X'])
+   left = DataFrame({'A': ['A0', 'A1', 'A2'],
+                     'B': ['B0', 'B1', 'B2']},
+                    index=index)
+
+   result = merge(left.reset_index(), right.reset_index(),
+                  on=['key'], how='inner').set_index(['key','X','Y'])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_two_multiindex.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
 Overlapping value columns
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -574,38 +935,47 @@ columns:
 
 .. ipython:: python
 
-   left = DataFrame({'key': ['foo', 'foo'], 'value': [1, 2]})
-   right = DataFrame({'key': ['foo', 'foo'], 'value': [4, 5]})
-   merge(left, right, on='key', suffixes=['_left', '_right'])
+   left = DataFrame({'k': ['K0', 'K1', 'K2'], 'v': [1, 2, 3]})
+   right = DataFrame({'k': ['K0', 'K0', 'K3'], 'v': [4, 5, 6]})
 
-``DataFrame.join`` has ``lsuffix`` and ``rsuffix`` arguments which behave
-similarly.
+   result = merge(left, right, on='k')
 
-.. _merging.ordered_merge:
+.. ipython:: python
+   :suppress:
 
-Merging Ordered Data
-~~~~~~~~~~~~~~~~~~~~
+   @savefig merging_merge_overlapped.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
-New in v0.8.0 is the ordered_merge function for combining time series and other
-ordered data. In particular it has an optional ``fill_method`` keyword to
-fill/interpolate missing data:
+.. ipython:: python
+
+   result = merge(left, right, on='k', suffixes=['_l', '_r'])
 
 .. ipython:: python
    :suppress:
 
-   A = DataFrame({'key' : ['a', 'c', 'e'] * 2,
-                  'lvalue' : [1, 2, 3] * 2,
-                  'group' : ['a', 'a', 'a', 'b', 'b', 'b']})
-   B = DataFrame({'key' : ['b', 'c', 'd'],
-                  'rvalue' : [1, 2, 3]})
+   @savefig merging_merge_overlapped_suffix.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
+
+``DataFrame.join`` has ``lsuffix`` and ``rsuffix`` arguments which behave
+similarly.
 
 .. ipython:: python
 
-   A
+   left = left.set_index('k')
+   right = right.set_index('k')
+   result = left.join(right, lsuffix='_l', rsuffix='_r')
 
-   B
+.. ipython:: python
+   :suppress:
 
-   ordered_merge(A, B, fill_method='ffill', left_by='group')
+   @savefig merging_merge_overlapped_multi_suffix.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=False);
+   plt.close('all');
 
 .. _merging.multiple_join:
 
@@ -617,11 +987,44 @@ them together on their indexes. The same is true for ``Panel.join``.
 
 .. ipython:: python
 
-   df1 = df.ix[:, ['A', 'B']]
-   df2 = df.ix[:, ['C', 'D']]
-   df3 = df.ix[:, ['key']]
-   df1
-   df1.join([df2, df3])
+   right2 = DataFrame({'v': [7, 8, 9]}, index=['K1', 'K1', 'K2'])
+   result = left.join([right, right2])
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_join_multi_df.png
+   p.plot([left, right, right2], result,
+          labels=['left', 'right', 'right2'], vertical=False);
+   plt.close('all');
+
+.. _merging.ordered_merge:
+
+Merging Ordered Data
+~~~~~~~~~~~~~~~~~~~~
+
+New in v0.8.0 is the ordered_merge function for combining time series and other
+ordered data. In particular it has an optional ``fill_method`` keyword to
+fill/interpolate missing data:
+
+.. ipython:: python
+
+   left = DataFrame({'k': ['K0', 'K1', 'K1', 'K2'],
+                     'lv': [1, 2, 3, 4],
+                     's': ['a', 'b', 'c', 'd']})
+
+   right = DataFrame({'k': ['K1', 'K2', 'K4'],
+                      'rv': [1, 2, 3]})
+
+   result = ordered_merge(left, right, fill_method='ffill', left_by='s')
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_ordered_merge.png
+   p.plot([left, right], result,
+          labels=['left', 'right'], vertical=True);
+   plt.close('all');
 
 .. _merging.combine_first.update:
 
@@ -643,87 +1046,33 @@ For this, use the ``combine_first`` method:
 
 .. ipython:: python
 
-   df1.combine_first(df2)
+   result = df1.combine_first(df2)
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_combine_first.png
+   p.plot([df1, df2], result,
+          labels=['df1', 'df2'], vertical=False);
+   plt.close('all');
 
 Note that this method only takes values from the right DataFrame if they are
 missing in the left DataFrame. A related method, ``update``, alters non-NA
 values inplace:
 
 .. ipython:: python
+   :suppress:
 
-   df1.update(df2)
-   df1
-
-.. _merging.on_mi:
-
-Merging with Multi-indexes
---------------------------
-
-.. _merging.join_on_mi:
-
-Joining a single Index to a Multi-index
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. versionadded:: 0.14.0
-
-You can join a singly-indexed DataFrame with a level of a multi-indexed DataFrame.
-The level will match on the name of the index of the singly-indexed frame against
-a level name of the multi-indexed frame.
-
-..  ipython:: python
-
-    household = DataFrame(dict(household_id = [1,2,3],
-                               male = [0,1,0],
-                               wealth = [196087.3,316478.7,294750]),
-                          columns = ['household_id','male','wealth']
-                         ).set_index('household_id')
-    household
-    portfolio = DataFrame(dict(household_id = [1,2,2,3,3,3,4],
-                               asset_id = ["nl0000301109","nl0000289783","gb00b03mlx29",
-                                           "gb00b03mlx29","lu0197800237","nl0000289965",np.nan],
-                               name = ["ABN Amro","Robeco","Royal Dutch Shell","Royal Dutch Shell",
-                                       "AAB Eastern Europe Equity Fund","Postbank BioTech Fonds",np.nan],
-                               share = [1.0,0.4,0.6,0.15,0.6,0.25,1.0]),
-                          columns = ['household_id','asset_id','name','share']
-                         ).set_index(['household_id','asset_id'])
-    portfolio
-
-    household.join(portfolio, how='inner')
-
-This is equivalent but less verbose and more memory efficient / faster than this.
-
-.. code-block:: python
-
-    merge(household.reset_index(),
-          portfolio.reset_index(),
-          on=['household_id'],
-          how='inner'
-         ).set_index(['household_id','asset_id'])
-
-Joining with two multi-indexes
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This is not Implemented via ``join`` at-the-moment, however it can be done using the following.
+   df1_copy = df1.copy()
 
 .. ipython:: python
 
-   household = DataFrame(dict(household_id = [1,2,2,3,3,3,4],
-                              asset_id = ["nl0000301109","nl0000301109","gb00b03mlx29",
-                                          "gb00b03mlx29","lu0197800237","nl0000289965",np.nan],
-                              share = [1.0,0.4,0.6,0.15,0.6,0.25,1.0]),
-                         columns = ['household_id','asset_id','share']
-                        ).set_index(['household_id','asset_id'])
-   household
+   df1.update(df2)
 
-   log_return = DataFrame(dict(asset_id = ["gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29",
-                                           "lu0197800237", "lu0197800237"],
-                               t = [233, 234, 235, 180, 181],
-                               log_return = [.09604978, -.06524096, .03532373, .03025441, .036997]),
-                         ).set_index(["asset_id","t"])
-   log_return
+.. ipython:: python
+   :suppress:
 
-   merge(household.reset_index(),
-         log_return.reset_index(),
-         on=['asset_id'],
-         how='inner'
-        ).set_index(['household_id','asset_id','t'])
+   @savefig merging_update.png
+   p.plot([df1_copy, df2], df1,
+          labels=['df1', 'df2'], vertical=False);
+   plt.close('all');
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index e643176003f2d..2d75c4b30b64d 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -11,6 +11,7 @@ Highlights include:
 
 - Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
 - New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
+- Revised "Merge, join, and concatenate" documentation including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
 
 - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enchancements.sample>`
 
diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py
new file mode 100644
index 0000000000000..20a2a68ce6b03
--- /dev/null
+++ b/pandas/util/doctools.py
@@ -0,0 +1,184 @@
+import numpy as np
+import pandas as pd
+import pandas.compat as compat
+
+
+class TablePlotter(object):
+    """
+    Layout some DataFrames in vertical/horizontal layout for explanation.
+    Used in merging.rst
+    """
+
+    def __init__(self, cell_width=0.37, cell_height=0.25, font_size=7.5):
+        self.cell_width = cell_width
+        self.cell_height = cell_height
+        self.font_size = font_size
+
+    def _shape(self, df):
+        """Calcurate table chape considering index levels"""
+        row, col = df.shape
+        return row + df.columns.nlevels, col + df.index.nlevels
+
+    def _get_cells(self, left, right, vertical):
+        """Calcurate appropriate figure size based on left and right data"""
+        if vertical:
+            # calcurate required number of cells
+            vcells = max(sum([self._shape(l)[0] for l in left]), self._shape(right)[0])
+            hcells = max([self._shape(l)[1] for l in left]) + self._shape(right)[1]
+        else:
+            vcells = max([self._shape(l)[0] for l in left] + [self._shape(right)[0]])
+            hcells = sum([self._shape(l)[1] for l in left] + [self._shape(right)[1]])
+        return hcells, vcells
+
+    def plot(self, left, right, labels=None, vertical=True):
+        """
+        Plot left / right DataFrames in specified layout.
+
+        Parameters
+        ----------
+        left : list of DataFrames before operation is applied
+        right : DataFrame of operation result
+        labels : list of str to be drawn as titles of left DataFrames
+        vertical : bool
+            If True, use vertical layout. If False, use horizontal layout.
+        """
+        import matplotlib.pyplot as plt
+        import matplotlib.gridspec as gridspec
+
+        if not isinstance(left, list):
+            left = [left]
+        left = [self._conv(l) for l in left]
+        right = self._conv(right)
+
+        hcells, vcells = self._get_cells(left, right, vertical)
+
+        if vertical:
+            figsize = self.cell_width * hcells, self.cell_height * vcells
+        else:
+            # include margin for titles
+            figsize = self.cell_width * hcells, self.cell_height * vcells
+        fig = plt.figure(figsize=figsize)
+
+        if vertical:
+            gs = gridspec.GridSpec(len(left), hcells)
+            # left
+            max_left_cols = max([self._shape(l)[1] for l in left])
+            max_left_rows = max([self._shape(l)[0] for l in left])
+            for i, (l, label) in enumerate(zip(left, labels)):
+                ax = fig.add_subplot(gs[i, 0:max_left_cols])
+                self._make_table(ax, l, title=label, height=1.0/max_left_rows)
+            # right
+            ax = plt.subplot(gs[:, max_left_cols:])
+            self._make_table(ax, right, title='Result', height=1.05/vcells)
+            fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95)
+        else:
+            max_rows = max([self._shape(df)[0] for df in left + [right]])
+            height = 1.0 / np.max(max_rows)
+            gs = gridspec.GridSpec(1, hcells)
+            # left
+            i = 0
+            for l, label in zip(left, labels):
+                sp = self._shape(l)
+                ax = fig.add_subplot(gs[0, i:i+sp[1]])
+                self._make_table(ax, l, title=label, height=height)
+                i += sp[1]
+            # right
+            ax = plt.subplot(gs[0, i:])
+            self._make_table(ax, right, title='Result', height=height)
+            fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95)
+
+        return fig
+
+    def _conv(self, data):
+        """Convert each input to appropriate for table outplot"""
+        if isinstance(data, pd.Series):
+            if data.name is None:
+                data = data.to_frame(name='')
+            else:
+                data = data.to_frame()
+        data = data.fillna('NaN')
+        return data
+
+    def _insert_index(self, data):
+        # insert is destructive
+        data = data.copy()
+        idx_nlevels = data.index.nlevels
+        if idx_nlevels == 1:
+            data.insert(0, 'Index', data.index)
+        else:
+            for i in range(idx_nlevels):
+                data.insert(i, 'Index{0}'.format(i), data.index.get_level_values(i))
+
+        col_nlevels = data.columns.nlevels
+        if col_nlevels > 1:
+            col = data.columns.get_level_values(0)
+            values = [data.columns.get_level_values(i).values for i in range(1, col_nlevels)]
+            col_df = pd.DataFrame(values)
+            data.columns = col_df.columns
+            data = pd.concat([col_df, data])
+            data.columns = col
+        return data
+
+    def _make_table(self, ax, df, title, height=None):
+        if df is None:
+            ax.set_visible(False)
+            return
+
+        import pandas.tools.plotting as plotting
+
+        idx_nlevels = df.index.nlevels
+        col_nlevels = df.columns.nlevels
+        # must be convert here to get index levels for colorization
+        df = self._insert_index(df)
+        tb = plotting.table(ax, df, loc=9)
+        tb.set_fontsize(self.font_size)
+
+        if height is None:
+            height = 1.0 / (len(df) + 1)
+
+        props = tb.properties()
+        for (r, c), cell in compat.iteritems(props['celld']):
+            if c == -1:
+                cell.set_visible(False)
+            elif r < col_nlevels and c < idx_nlevels:
+                cell.set_visible(False)
+            elif r < col_nlevels or c < idx_nlevels:
+                cell.set_facecolor('#AAAAAA')
+            cell.set_height(height)
+
+        ax.set_title(title, size=self.font_size)
+        ax.axis('off')
+
+
+if __name__ == "__main__":
+    import pandas as pd
+    import matplotlib.pyplot as plt
+
+    p = TablePlotter()
+
+    df1 = pd.DataFrame({'A': [10, 11, 12],
+                        'B': [20, 21, 22],
+                        'C': [30, 31, 32]})
+    df2 = pd.DataFrame({'A': [10, 12],
+                        'C': [30, 32]})
+
+    p.plot([df1, df2], pd.concat([df1, df2]),
+           labels=['df1', 'df2'], vertical=True)
+    plt.show()
+
+    df3 = pd.DataFrame({'X': [10, 12],
+                        'Z': [30, 32]})
+
+    p.plot([df1, df3], pd.concat([df1, df3], axis=1),
+           labels=['df1', 'df2'], vertical=False)
+    plt.show()
+
+    idx = pd.MultiIndex.from_tuples([(1, 'A'), (1, 'B'), (1, 'C'),
+                                 (2, 'A'), (2, 'B'), (2, 'C')])
+    col = pd.MultiIndex.from_tuples([(1, 'A'), (1, 'B')])
+    df3 = pd.DataFrame({'v1': [1, 2, 3, 4, 5, 6],
+                        'v2': [5, 6, 7, 8, 9, 10]},
+                        index=idx)
+    df3.columns = col
+    p.plot(df3, df3, labels=['df3'])
+    plt.show()

From f0a77bb1cb2430913e4d3a11d7a09a5916153dce Mon Sep 17 00:00:00 2001
From: Vikram Shirgur <vikram@shirgur.org>
Date: Thu, 7 May 2015 14:08:13 -0700
Subject: [PATCH 169/239] BUG: Use temp files in all pytables tests to fix
 (GH9992).

---
 doc/source/whatsnew/v0.16.1.txt  |  2 +-
 pandas/core/strings.py           |  1 +
 pandas/io/tests/test_pytables.py | 42 +++++++++++++++++---------------
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 2d75c4b30b64d..4988ef2da4121 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -278,7 +278,7 @@ Bug Fixes
 - Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`)
 - Bug in ``.loc`` with a read-only ndarray data source (:issue:`10043`)
 - Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`)
-
+- Always use temporary files in pytables tests (:issue:`9992`)
 - Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`)
 - Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns  (:issue:`9853`)
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 6e603f60e02a2..9de11f0193743 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1001,6 +1001,7 @@ def __iter__(self):
             g = self.get(i)
 
     def _wrap_result(self, result, **kwargs):
+
         # leave as it is to keep extract and get_dummies results
         # can be merged to _wrap_result_expand in v0.17
         from pandas.core.series import Series
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 2e5cef8a1ef57..6cfd569904097 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -156,50 +156,51 @@ def tearDown(self):
         pass
 
     def test_factory_fun(self):
+        path = create_tempfile(self.path)
         try:
-            with get_store(self.path) as tbl:
+            with get_store(path) as tbl:
                 raise ValueError('blah')
         except ValueError:
             pass
         finally:
-            safe_remove(self.path)
+            safe_remove(path)
 
         try:
-            with get_store(self.path) as tbl:
+            with get_store(path) as tbl:
                 tbl['a'] = tm.makeDataFrame()
 
-            with get_store(self.path) as tbl:
+            with get_store(path) as tbl:
                 self.assertEqual(len(tbl), 1)
                 self.assertEqual(type(tbl['a']), DataFrame)
         finally:
             safe_remove(self.path)
 
     def test_context(self):
+        path = create_tempfile(self.path)
         try:
-            with HDFStore(self.path) as tbl:
+            with HDFStore(path) as tbl:
                 raise ValueError('blah')
         except ValueError:
             pass
         finally:
-            safe_remove(self.path)
+            safe_remove(path)
 
         try:
-            with HDFStore(self.path) as tbl:
+            with HDFStore(path) as tbl:
                 tbl['a'] = tm.makeDataFrame()
 
-            with HDFStore(self.path) as tbl:
+            with HDFStore(path) as tbl:
                 self.assertEqual(len(tbl), 1)
                 self.assertEqual(type(tbl['a']), DataFrame)
         finally:
-            safe_remove(self.path)
+            safe_remove(path)
 
     def test_conv_read_write(self):
-
+        path = create_tempfile(self.path)
         try:
-
             def roundtrip(key, obj,**kwargs):
-                obj.to_hdf(self.path, key,**kwargs)
-                return read_hdf(self.path, key)
+                obj.to_hdf(path, key,**kwargs)
+                return read_hdf(path, key)
 
             o = tm.makeTimeSeries()
             assert_series_equal(o, roundtrip('series',o))
@@ -215,12 +216,12 @@ def roundtrip(key, obj,**kwargs):
 
             # table
             df = DataFrame(dict(A=lrange(5), B=lrange(5)))
-            df.to_hdf(self.path,'table',append=True)
-            result = read_hdf(self.path, 'table', where = ['index>2'])
+            df.to_hdf(path,'table',append=True)
+            result = read_hdf(path, 'table', where = ['index>2'])
             assert_frame_equal(df[df.index>2],result)
 
         finally:
-            safe_remove(self.path)
+            safe_remove(path)
 
     def test_long_strings(self):
 
@@ -4329,13 +4330,14 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
         df = tm.makeDataFrame()
 
         try:
-            st = HDFStore(self.path)
+            path = create_tempfile(self.path)
+            st = HDFStore(path)
             st.append('df', df, data_columns = ['A'])
             st.close()
-            do_copy(f = self.path)
-            do_copy(f = self.path, propindexes = False)
+            do_copy(f = path)
+            do_copy(f = path, propindexes = False)
         finally:
-            safe_remove(self.path)
+            safe_remove(path)
 
     def test_legacy_table_write(self):
         raise nose.SkipTest("cannot write legacy tables")

From 31e8c676bcba937b789b77597ebc1ec26ff2a92d Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 4 May 2015 11:50:03 -0700
Subject: [PATCH 170/239] ENH: support __radd__ operation on Index (GH10083)

---
 pandas/core/index.py       | 10 ++++++++--
 pandas/tests/test_index.py |  7 +++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index b49108378ca68..9ee040ed30ccd 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1181,12 +1181,18 @@ def argsort(self, *args, **kwargs):
     def __add__(self, other):
         if com.is_list_like(other):
             warnings.warn("using '+' to provide set union with Indexes is deprecated, "
-                          "use '|' or .union()",FutureWarning)
+                          "use '|' or .union()", FutureWarning)
         if isinstance(other, Index):
             return self.union(other)
         return Index(np.array(self) + other)
+
+    def __radd__(self, other):
+        if com.is_list_like(other):
+            warnings.warn("using '+' to provide set union with Indexes is deprecated, "
+                          "use '|' or .union()", FutureWarning)
+        return Index(other + np.array(self))
+
     __iadd__ = __add__
-    __radd__ = __add__
 
     def __sub__(self, other):
         warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index a7ce3dcdda9f7..2ee9d405d9601 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -705,6 +705,13 @@ def test_add(self):
         tm.assert_contains_all(self.strIndex, secondCat)
         tm.assert_contains_all(self.dateIndex, firstCat)
 
+        # test add and radd
+        idx = Index(list('abc'))
+        expected = Index(['a1', 'b1', 'c1'])
+        self.assert_index_equal(idx + '1', expected)
+        expected = Index(['1a', '1b', '1c'])
+        self.assert_index_equal('1' + idx, expected)
+
     def test_append_multiple(self):
         index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
 

From ad4fedbcd4b3ef79883490b3d6c79a6e6ebc9614 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 8 May 2015 09:23:41 -0400
Subject: [PATCH 171/239] DOC: additional whatsnew

---
 doc/source/whatsnew/v0.16.1.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 4988ef2da4121..031f9ba1e4f47 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -134,7 +134,7 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
    df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
    df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
 
-See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
+See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`)
 
 .. _whatsnew_0161.enhancements.sample:
 
@@ -302,11 +302,15 @@ Bug Fixes
 - Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> f00d6bb... Fixed bug #9671 where 'DataFrame.plot()' raised an error when both 'color' and 'style' keywords were passed and there was no color symbol in the style strings (this should be allowed)
 =======
 =======
 
 >>>>>>> 2997e70... BUG: read_csv skips lines with initial whitespace + one non-space character (GH9710)
+=======
+- Not showing a ``DeprecationWarning`` on combining list-likes with an ``Index`` (:issue:`10083`)
+>>>>>>> 90a3f26... DOC: additional whatsnew
 - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
 <<<<<<< HEAD
 <<<<<<< HEAD

From bb226bb7bf89f4540e5c458d74e01b2e79de4ffd Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 8 May 2015 09:56:41 -0400
Subject: [PATCH 172/239] TST: using - rather than difference in
 tools/plotting.py

---
 pandas/tools/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 268bd306585ad..f92f398d9be94 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -3009,7 +3009,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None,
     if columns is None:
         if not isinstance(by, (list, tuple)):
             by = [by]
-        columns = data._get_numeric_data().columns - by
+        columns = data._get_numeric_data().columns.difference(by)
     naxes = len(columns)
     fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True,
                           figsize=figsize, ax=ax, layout=layout)

From 5dd50c33af7d5db41b11ce4988d330f87f0998f7 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 5 May 2015 00:02:28 -0700
Subject: [PATCH 173/239] DOC/CLN: docs fixes for Series.shift and
 DataFrame.shift

---
 doc/source/timeseries.rst |  3 +--
 pandas/core/frame.py      |  5 +++++
 pandas/core/generic.py    | 10 ++++++----
 pandas/core/series.py     |  5 +++++
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 172c71dc53c46..b69b523d9c908 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1010,8 +1010,7 @@ Shifting / lagging
 
 One may want to *shift* or *lag* the values in a TimeSeries back and forward in
 time. The method for this is ``shift``, which is available on all of the pandas
-objects. In DataFrame, ``shift`` will currently only shift along the ``index``
-and in Panel along the ``major_axis``.
+objects.
 
 .. ipython:: python
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9366ce859ce89..7cce560baa1fc 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2525,6 +2525,11 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                                              limit=limit, downcast=downcast,
                                              **kwargs)
 
+    @Appender(_shared_docs['shift'] % _shared_doc_kwargs)
+    def shift(self, periods=1, freq=None, axis=0, **kwargs):
+        return super(DataFrame, self).shift(periods=periods, freq=freq,
+                                            axis=axis, **kwargs)
+
     def set_index(self, keys, drop=True, append=False, inplace=False,
                   verify_integrity=False):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 27565056490ce..4fb08a7b7e107 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3584,8 +3584,7 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None,
         return self.where(~cond, other=other, inplace=inplace, axis=axis,
             level=level, try_cast=try_cast, raise_on_error=raise_on_error)
 
-    def shift(self, periods=1, freq=None, axis=0, **kwargs):
-        """
+    _shared_docs['shift'] = ("""
         Shift index by desired number of periods with an optional time freq
 
         Parameters
@@ -3595,6 +3594,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwargs):
         freq : DateOffset, timedelta, or time rule string, optional
             Increment to use from datetools module or time rule (e.g. 'EOM').
             See Notes.
+        axis : %(axes_single_arg)s
 
         Notes
         -----
@@ -3604,8 +3604,10 @@ def shift(self, periods=1, freq=None, axis=0, **kwargs):
 
         Returns
         -------
-        shifted : same type as caller
-        """
+        shifted : %(klass)s
+    """)
+    @Appender(_shared_docs['shift'] % _shared_doc_kwargs)
+    def shift(self, periods=1, freq=None, axis=0, **kwargs):
         if periods == 0:
             return self
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a63434a43c7f9..95b6a6aa1e7dd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2151,6 +2151,11 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                                           limit=limit, downcast=downcast,
                                           **kwargs)
 
+    @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
+    def shift(self, periods=1, freq=None, axis=0, **kwargs):
+        return super(Series, self).shift(periods=periods, freq=freq,
+                                         axis=axis, **kwargs)
+
     def reindex_axis(self, labels, axis=0, **kwargs):
         """ for compatibility with higher dims """
         if axis != 0:

From fd69d335ce794f1aee988d11e3a44caff5af2352 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 30 Apr 2015 12:26:13 -0400
Subject: [PATCH 174/239] pd.lib.infer_dtype now returns bytes in python 3 when
 appropriate

---
 pandas/compat/__init__.py    |  2 ++
 pandas/src/inference.pyx     | 66 +++++++++++++++++++++++-------------
 pandas/tests/test_lib.py     | 13 ++++++-
 pandas/tests/test_strings.py | 11 ++++++
 4 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index bff6eb1f95abc..6be0facf2bffc 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -37,6 +37,8 @@
 
 PY3 = (sys.version_info[0] >= 3)
 PY3_2 = sys.version_info[:2] == (3, 2)
+PY2 = sys.version_info[0] == 2
+
 
 try:
     import __builtin__ as builtins
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index dbe6f2f1f8351..55d5e37fc19ee 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -1,8 +1,11 @@
+import sys
 cimport util
 from tslib import NaT
 from datetime import datetime, timedelta
 iNaT = util.get_nat()
 
+cdef bint PY2 = sys.version_info[0] == 2
+
 # core.common import for fast inference checks
 def is_float(object obj):
     return util.is_float_object(obj)
@@ -38,10 +41,10 @@ _TYPE_MAP = {
     'f' : 'floating',
     'complex128': 'complex',
     'c' : 'complex',
-    'string': 'string',
-    'S' : 'string',
-    'unicode': 'unicode',
-    'U' : 'unicode',
+    'string': 'string' if PY2 else 'bytes',
+    'S' : 'string' if PY2 else 'bytes',
+    'unicode': 'unicode' if PY2 else 'string',
+    'U' : 'unicode' if PY2 else 'string',
     'bool': 'boolean',
     'b' : 'boolean',
     'datetime64[ns]' : 'datetime64',
@@ -181,6 +184,10 @@ def infer_dtype(object _values):
         if is_unicode_array(values):
             return 'unicode'
 
+    elif PyBytes_Check(val):
+        if is_bytes_array(values):
+            return 'bytes'
+
     elif is_timedelta(val):
         if is_timedelta_or_timedelta64_array(values):
             return 'timedelta'
@@ -196,11 +203,6 @@ def infer_dtype(object _values):
 
     return 'mixed'
 
-def infer_dtype_list(list values):
-    cdef:
-        Py_ssize_t i, n = len(values)
-    pass
-
 
 def is_possible_datetimelike_array(object arr):
     # determine if we have a possible datetimelike (or null-like) array
@@ -253,7 +255,6 @@ def is_bool_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
     if issubclass(values.dtype.type, np.bool_):
         return True
@@ -277,7 +278,6 @@ def is_integer_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
     if issubclass(values.dtype.type, np.integer):
         return True
@@ -298,7 +298,6 @@ def is_integer_float_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
     if issubclass(values.dtype.type, np.integer):
         return True
@@ -321,7 +320,6 @@ def is_float_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
     if issubclass(values.dtype.type, np.floating):
         return True
@@ -342,9 +340,9 @@ def is_string_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
-    if issubclass(values.dtype.type, (np.string_, np.unicode_)):
+    if ((PY2 and issubclass(values.dtype.type, np.string_)) or
+        not PY2 and issubclass(values.dtype.type, np.unicode_)):
         return True
     elif values.dtype == np.object_:
         objbuf = values
@@ -363,7 +361,6 @@ def is_unicode_array(ndarray values):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[object] objbuf
-        object obj
 
     if issubclass(values.dtype.type, np.unicode_):
         return True
@@ -381,8 +378,29 @@ def is_unicode_array(ndarray values):
         return False
 
 
+def is_bytes_array(ndarray values):
+    cdef:
+        Py_ssize_t i, n = len(values)
+        ndarray[object] objbuf
+
+    if issubclass(values.dtype.type, np.bytes_):
+        return True
+    elif values.dtype == np.object_:
+        objbuf = values
+
+        if n == 0:
+            return False
+
+        for i in range(n):
+            if not PyBytes_Check(objbuf[i]):
+                return False
+        return True
+    else:
+        return False
+
+
 def is_datetime_array(ndarray[object] values):
-    cdef int i, null_count = 0, n = len(values)
+    cdef Py_ssize_t i, null_count = 0, n = len(values)
     cdef object v
     if n == 0:
         return False
@@ -399,7 +417,7 @@ def is_datetime_array(ndarray[object] values):
     return null_count != n
 
 def is_datetime64_array(ndarray values):
-    cdef int i, null_count = 0, n = len(values)
+    cdef Py_ssize_t i, null_count = 0, n = len(values)
     cdef object v
     if n == 0:
         return False
@@ -416,7 +434,7 @@ def is_datetime64_array(ndarray values):
     return null_count != n
 
 def is_timedelta_array(ndarray values):
-    cdef int i, null_count = 0, n = len(values)
+    cdef Py_ssize_t i, null_count = 0, n = len(values)
     cdef object v
     if n == 0:
         return False
@@ -431,7 +449,7 @@ def is_timedelta_array(ndarray values):
     return null_count != n
 
 def is_timedelta64_array(ndarray values):
-    cdef int i, null_count = 0, n = len(values)
+    cdef Py_ssize_t i, null_count = 0, n = len(values)
     cdef object v
     if n == 0:
         return False
@@ -447,7 +465,7 @@ def is_timedelta64_array(ndarray values):
 
 def is_timedelta_or_timedelta64_array(ndarray values):
     """ infer with timedeltas and/or nat/none """
-    cdef int i, null_count = 0, n = len(values)
+    cdef Py_ssize_t i, null_count = 0, n = len(values)
     cdef object v
     if n == 0:
         return False
@@ -462,7 +480,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
     return null_count != n
 
 def is_date_array(ndarray[object] values):
-    cdef int i, n = len(values)
+    cdef Py_ssize_t i, n = len(values)
     if n == 0:
         return False
     for i in range(n):
@@ -471,7 +489,7 @@ def is_date_array(ndarray[object] values):
     return True
 
 def is_time_array(ndarray[object] values):
-    cdef int i, n = len(values)
+    cdef Py_ssize_t i, n = len(values)
     if n == 0:
         return False
     for i in range(n):
@@ -484,7 +502,7 @@ def is_period(object o):
     return isinstance(o,Period)
 
 def is_period_array(ndarray[object] values):
-    cdef int i, n = len(values)
+    cdef Py_ssize_t i, n = len(values)
     from pandas.tseries.period import Period
 
     if n == 0:
diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py
index 4c134b25636a7..6d9bea29cf44d 100644
--- a/pandas/tests/test_lib.py
+++ b/pandas/tests/test_lib.py
@@ -6,7 +6,7 @@
 import pandas as pd
 from pandas.lib import isscalar, item_from_zerodim, max_len_string_array
 import pandas.util.testing as tm
-from pandas.compat import u
+from pandas.compat import u, PY2
 
 
 class TestMisc(tm.TestCase):
@@ -28,6 +28,17 @@ def test_max_len_string_array(self):
         tm.assertRaises(TypeError,
                         lambda: max_len_string_array(arr.astype('U')))
 
+    def test_infer_dtype_bytes(self):
+        compare = 'string' if PY2 else 'bytes'
+
+        # string array of bytes
+        arr = np.array(list('abc'), dtype='S1')
+        self.assertEqual(pd.lib.infer_dtype(arr), compare)
+
+        # object array of bytes
+        arr = arr.astype(object)
+        self.assertEqual(pd.lib.infer_dtype(arr), compare)
+
 
 class TestIsscalar(tm.TestCase):
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index c9b11810d83fe..b77b52c0e17a1 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -1802,6 +1802,17 @@ def test_index_str_accessor_visibility(self):
         with self.assertRaisesRegexp(AttributeError, message):
             idx.str
 
+    def test_method_on_bytes(self):
+        lhs = Series(np.array(list('abc'), 'S1').astype(object))
+        rhs = Series(np.array(list('def'), 'S1').astype(object))
+        if compat.PY3:
+            self.assertRaises(TypeError, lhs.str.cat, rhs)
+        else:
+            result = lhs.str.cat(rhs)
+            expected = Series(np.array(['ad', 'be', 'cf'],
+                                       'S2').astype(object))
+            tm.assert_series_equal(result, expected)
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

From 5cb5141f6dc2bc80f04042cd73832f6460f0bc75 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 8 May 2015 10:16:29 -0400
Subject: [PATCH 175/239] Release note [ci skip]

---
 doc/source/whatsnew/v0.16.1.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 031f9ba1e4f47..e8af72ea24d03 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -68,6 +68,8 @@ Enhancements
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
+- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate :issue:`10032`.
+
 .. _whatsnew_0161.enhancements.categoricalindex:
 
 CategoricalIndex

From 4154bd33a7ac35cd9575b05a329403b44459e1ff Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Fri, 1 May 2015 13:33:49 -0700
Subject: [PATCH 176/239] ENH: support StringMethods index and rindex

---
 doc/source/api.rst              |  2 ++
 doc/source/text.rst             |  2 ++
 doc/source/whatsnew/v0.16.1.txt |  3 +-
 pandas/core/strings.py          | 56 +++++++++++++++++++++++++++++++++
 pandas/tests/test_strings.py    | 47 +++++++++++++++++++++++++++
 5 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 364b3ba04aefb..b708e35f3b6e1 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -536,6 +536,7 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.find
    Series.str.findall
    Series.str.get
+   Series.str.index
    Series.str.join
    Series.str.len
    Series.str.ljust
@@ -548,6 +549,7 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.repeat
    Series.str.replace
    Series.str.rfind
+   Series.str.rindex
    Series.str.rjust
    Series.str.rpartition
    Series.str.rstrip
diff --git a/doc/source/text.rst b/doc/source/text.rst
index bb27fe52ba7a5..4ec041d19ce1b 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -268,6 +268,8 @@ Method Summary
     :meth:`~Series.str.upper`,Equivalent to ``str.upper``
     :meth:`~Series.str.find`,Equivalent to ``str.find``
     :meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
+    :meth:`~Series.str.index`,Equivalent to ``str.index``
+    :meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
     :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
     :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
     :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index e8af72ea24d03..0ac13194b1940 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -189,12 +189,13 @@ String Methods Enhancements
 :ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, following
 enhancements are performed to make string operation easier.
 
-- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`)
+- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`)
 
   ================  ===============  ===============  ===============  ================
   ..                ..               Methods          ..               ..
   ================  ===============  ===============  ===============  ================
   ``capitalize()``  ``swapcase()``   ``normalize()``  ``partition()``  ``rpartition()``
+  ``index()``       ``rindex()``
   ================  ===============  ===============  ===============  ================
 
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 9de11f0193743..d92d164acdd4b 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -638,6 +638,26 @@ def str_find(arr, sub, start=0, end=None, side='left'):
     return _na_map(f, arr, dtype=int)
 
 
+def str_index(arr, sub, start=0, end=None, side='left'):
+    if not isinstance(sub, compat.string_types):
+        msg = 'expected a string object, not {0}'
+        raise TypeError(msg.format(type(sub).__name__))
+
+    if side == 'left':
+        method = 'index'
+    elif side == 'right':
+        method = 'rindex'
+    else:  # pragma: no cover
+        raise ValueError('Invalid side')
+
+    if end is None:
+        f = lambda x: getattr(x, method)(sub, start)
+    else:
+        f = lambda x: getattr(x, method)(sub, start, end)
+
+    return _na_map(f, arr, dtype=int)
+
+
 def str_pad(arr, width, side='left', fillchar=' '):
     """
     Pad strings in the Series/Index with an additional character to
@@ -1327,6 +1347,42 @@ def normalize(self, form):
         result = _na_map(f, self.series)
         return self._wrap_result(result)
 
+    _shared_docs['index'] = ("""
+    Return %(side)s indexes in each strings where the substring is
+    fully contained between [start:end]. This is the same as ``str.%(similar)s``
+    except instead of returning -1, it raises a ValueError when the substring
+    is not found. Equivalent to standard ``str.%(method)s``.
+
+    Parameters
+    ----------
+    sub : str
+        Substring being searched
+    start : int
+        Left edge index
+    end : int
+        Right edge index
+
+    Returns
+    -------
+    found : Series/Index of objects
+
+    See Also
+    --------
+    %(also)s
+    """)
+
+    @Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
+              also='rindex : Return highest indexes in each strings'))
+    def index(self, sub, start=0, end=None):
+        result = str_index(self.series, sub, start=start, end=end, side='left')
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
+              also='index : Return lowest indexes in each strings'))
+    def rindex(self, sub, start=0, end=None):
+        result = str_index(self.series, sub, start=start, end=end, side='right')
+        return self._wrap_result(result)
+
     _shared_docs['len'] = ("""
     Compute length of each string in the Series/Index.
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index b77b52c0e17a1..8cd8ac9f66a1f 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -908,6 +908,53 @@ def test_find_nan(self):
         result = values.str.rfind('EF', 3, 6)
         tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
 
+    def test_index(self):
+        for klass in [Series, Index]:
+            s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF'])
+
+            result = s.str.index('EF')
+            tm.assert_array_equal(result, klass([4, 3, 1, 0]))
+            expected = np.array([v.index('EF') for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            result = s.str.rindex('EF')
+            tm.assert_array_equal(result, klass([4, 5, 7, 4]))
+            expected = np.array([v.rindex('EF') for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            result = s.str.index('EF', 3)
+            tm.assert_array_equal(result, klass([4, 3, 7, 4]))
+            expected = np.array([v.index('EF', 3) for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            result = s.str.rindex('EF', 3)
+            tm.assert_array_equal(result, klass([4, 5, 7, 4]))
+            expected = np.array([v.rindex('EF', 3) for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            result = s.str.index('E', 4, 8)
+            tm.assert_array_equal(result, klass([4, 5, 7, 4]))
+            expected = np.array([v.index('E', 4, 8) for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            result = s.str.rindex('E', 0, 5)
+            tm.assert_array_equal(result, klass([4, 3, 1, 4]))
+            expected = np.array([v.rindex('E', 0, 5) for v in s.values])
+            tm.assert_array_equal(result.values, expected)
+
+            with tm.assertRaisesRegexp(ValueError, "substring not found"):
+                result = s.str.index('DE')
+
+            with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
+                result = s.str.index(0)
+
+        # test with nan
+        s = Series(['abcb', 'ab', 'bcbe', np.nan])
+        result = s.str.index('b')
+        tm.assert_array_equal(result, Series([1, 1, 0, np.nan]))
+        result = s.str.rindex('b')
+        tm.assert_array_equal(result, Series([3, 1, 2, np.nan]))
+
     def test_pad(self):
         values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
 

From 4fd56744b88d8977e55fbf46a3aad49e40fcbfde Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Mon, 4 May 2015 18:20:52 -0700
Subject: [PATCH 177/239] ENH: support str translate for StringMethods

---
 doc/source/api.rst              |  1 +
 doc/source/text.rst             |  1 +
 doc/source/whatsnew/v0.16.1.txt |  4 +--
 pandas/core/strings.py          | 43 +++++++++++++++++++++++++++++++++
 pandas/tests/test_strings.py    | 37 ++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index b708e35f3b6e1..57ae089e463c8 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -560,6 +560,7 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.strip
    Series.str.swapcase
    Series.str.title
+   Series.str.translate
    Series.str.upper
    Series.str.wrap
    Series.str.zfill
diff --git a/doc/source/text.rst b/doc/source/text.rst
index 4ec041d19ce1b..810e3e0146f9f 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -273,6 +273,7 @@ Method Summary
     :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
     :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
     :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
+    :meth:`~Series.str.translate`,Equivalent to ``str.translate``
     :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
     :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
     :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0ac13194b1940..a27210acf38c3 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -189,13 +189,13 @@ String Methods Enhancements
 :ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, following
 enhancements are performed to make string operation easier.
 
-- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`)
+- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`)
 
   ================  ===============  ===============  ===============  ================
   ..                ..               Methods          ..               ..
   ================  ===============  ===============  ===============  ================
   ``capitalize()``  ``swapcase()``   ``normalize()``  ``partition()``  ``rpartition()``
-  ``index()``       ``rindex()``
+  ``index()``       ``rindex()``     ``translate()``
   ================  ===============  ===============  ===============  ================
 
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index d92d164acdd4b..a25879e61b580 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -890,6 +890,44 @@ def str_wrap(arr, width, **kwargs):
     return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr)
 
 
+def str_translate(arr, table, deletechars=None):
+    """
+    Map all characters in the string through the given mapping table.
+    Equivalent to standard :meth:`str.translate`. Note that the optional
+    argument deletechars is only valid if you are using python 2. For python 3,
+    character deletion should be specified via the table argument.
+
+    Parameters
+    ----------
+    table : dict (python 3), str or None (python 2)
+        In python 3, table is a mapping of Unicode ordinals to Unicode ordinals,
+        strings, or None. Unmapped characters are left untouched. Characters
+        mapped to None are deleted. :meth:`str.maketrans` is a helper function
+        for making translation tables.
+        In python 2, table is either a string of length 256 or None. If the
+        table argument is None, no translation is applied and the operation
+        simply removes the characters in deletechars. :func:`string.maketrans`
+        is a helper function for making translation tables.
+    deletechars : str, optional (python 2)
+        A string of characters to delete. This argument is only valid
+        in python 2.
+
+    Returns
+    -------
+    translated : Series/Index of objects
+    """
+    if deletechars is None:
+        f = lambda x: x.translate(table)
+    else:
+        from pandas import compat
+        if compat.PY3:
+            raise ValueError("deletechars is not a valid argument for "
+                             "str.translate in python 3. You should simply "
+                             "specify character deletions in the table argument")
+        f = lambda x: x.translate(table, deletechars)
+    return _na_map(f, arr)
+
+
 def str_get(arr, i):
     """
     Extract element from lists, tuples, or strings in each element in the
@@ -1283,6 +1321,11 @@ def get_dummies(self, sep='|'):
         result = str_get_dummies(self.series, sep)
         return self._wrap_result(result)
 
+    @copy(str_translate)
+    def translate(self, table, deletechars=None):
+        result = str_translate(self.series, table, deletechars)
+        return self._wrap_result(result)
+
     count = _pat_wrapper(str_count, flags=True)
     startswith = _pat_wrapper(str_startswith, na=True)
     endswith = _pat_wrapper(str_endswith, na=True)
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 8cd8ac9f66a1f..9011e6c64b097 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -700,6 +700,12 @@ def test_empty_str_methods(self):
         tm.assert_series_equal(empty_str, empty.str.capitalize())
         tm.assert_series_equal(empty_str, empty.str.swapcase())
         tm.assert_series_equal(empty_str, empty.str.normalize('NFC'))
+        if compat.PY3:
+            table = str.maketrans('a', 'b')
+        else:
+            import string
+            table = string.maketrans('a', 'b')
+        tm.assert_series_equal(empty_str, empty.str.translate(table))
 
     def test_empty_str_methods_to_frame(self):
         empty_str = empty = Series(dtype=str)
@@ -1039,6 +1045,37 @@ def test_pad_fillchar(self):
         with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
             result = values.str.pad(5, fillchar=5)
 
+    def test_translate(self):
+        for klass in [Series, Index]:
+            s = klass(['abcdefg', 'abcc', 'cdddfg', 'cdefggg'])
+            if not compat.PY3:
+                import string
+                table = string.maketrans('abc', 'cde')
+            else:
+                table = str.maketrans('abc', 'cde')
+            result = s.str.translate(table)
+            expected = klass(['cdedefg', 'cdee', 'edddfg', 'edefggg'])
+            tm.assert_array_equal(result, expected)
+
+            # use of deletechars is python 2 only
+            if not compat.PY3:
+                result = s.str.translate(table, deletechars='fg')
+                expected = klass(['cdede', 'cdee', 'eddd', 'ede'])
+                tm.assert_array_equal(result, expected)
+
+                result = s.str.translate(None, deletechars='fg')
+                expected = klass(['abcde', 'abcc', 'cddd', 'cde'])
+                tm.assert_array_equal(result, expected)
+            else:
+                with tm.assertRaisesRegexp(ValueError, "deletechars is not a valid argument"):
+                    result = s.str.translate(table, deletechars='fg')
+
+        # Series with non-string values
+        s = Series(['a', 'b', 'c', 1.2])
+        expected = Series(['c', 'd', 'e', np.nan])
+        result = s.str.translate(table)
+        tm.assert_array_equal(result, expected)
+
     def test_center_ljust_rjust(self):
         values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
 

From ed60c7dbbceacdb48662cb635e18ca50a0aacec9 Mon Sep 17 00:00:00 2001
From: floydsoft <floydsoft@gmail.com>
Date: Sat, 9 May 2015 20:34:39 +0800
Subject: [PATCH 178/239] fix the inconsistency between code and description

---
 doc/source/merging.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 8f2f4c9467ac2..d51c2f62b8a0c 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -490,7 +490,7 @@ standard database join operations between DataFrame objects:
 
 ::
 
-    merge(left, right, how='left', on=None, left_on=None, right_on=None,
+    merge(left, right, how='inner', on=None, left_on=None, right_on=None,
           left_index=False, right_index=False, sort=True,
           suffixes=('_x', '_y'), copy=True)
 

From 4817ae7e4ffde773db0f03e98a79544ba435cadf Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Fri, 8 May 2015 14:12:30 +0900
Subject: [PATCH 179/239] DEPR: Deprecate str.split return_type #9847

---
 doc/source/whatsnew/v0.16.1.txt | 29 ++++++++++++++
 pandas/core/strings.py          | 54 +++++++++++--------------
 pandas/tests/test_index.py      | 11 ++---
 pandas/tests/test_strings.py    | 71 +++++++++++++++++++++++++++++++--
 4 files changed, 125 insertions(+), 40 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a27210acf38c3..0800583e15111 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -221,6 +221,28 @@ enhancements are performed to make string operation easier.
      idx.str.startswith('a')
      s[s.index.str.startswith('a')]
 
+
+- ``split`` now takes ``expand`` keyword to specify whether to expand dimensionality. ``return_type`` is deprecated. (:issue:`9847`)
+
+  .. ipython:: python
+
+     s = Series(['a,b', 'a,c', 'b,c'])
+
+     # return Series
+     s.str.split(',')
+
+     # return DataFrame
+     s.str.split(',', expand=True)
+
+     idx = Index(['a,b', 'a,c', 'b,c'])
+
+     # return Index
+     idx.str.split(',')
+
+     # return MultiIndex
+     idx.str.split(',', expand=True)
+
+
 - Improved ``extract`` and ``get_dummies`` methods for ``Index.str`` (:issue:`9980`)
 
 .. _whatsnew_0161.api:
@@ -249,6 +271,13 @@ API changes
 
 - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
+.. _whatsnew_0161.deprecations:
+
+Deprecations
+^^^^^^^^^^^^
+
+- ``Series.str.split``'s ``return_type`` keyword was removed in favor of ``expand`` (:issue:`9847`)
+
 .. _whatsnew_0161.performance:
 
 Performance Improvements
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index a25879e61b580..8da43c18b989f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -3,7 +3,7 @@
 from pandas.compat import zip
 from pandas.core.common import isnull, _values_from_object, is_bool_dtype
 import pandas.compat as compat
-from pandas.util.decorators import Appender
+from pandas.util.decorators import Appender, deprecate_kwarg
 import re
 import pandas.lib as lib
 import warnings
@@ -696,7 +696,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
     return _na_map(f, arr)
 
 
-def str_split(arr, pat=None, n=None, return_type='series'):
+def str_split(arr, pat=None, n=None):
     """
     Split each string (a la re.split) in the Series/Index by given
     pattern, propagating NA values. Equivalent to :meth:`str.split`.
@@ -705,29 +705,17 @@ def str_split(arr, pat=None, n=None, return_type='series'):
     ----------
     pat : string, default None
         String or regular expression to split on. If None, splits on whitespace
-    n : int, default None (all)
-    return_type : {'series', 'index', 'frame'}, default 'series'
-        If frame, returns a DataFrame (elements are strings)
-        If series or index, returns the same type as the original object
-        (elements are lists of strings).
-
-    Notes
-    -----
-    Both 0 and -1 will be interpreted as return all splits
+    n : int, default -1 (all)
+        None, 0 and -1 will be interpreted as return all splits
+    expand : bool, default False
+        * If True, return DataFrame/MultiIndex expanding dimensionality.
+        * If False, return Series/Index.
+    return_type : deprecated, use `expand`
 
     Returns
     -------
-    split : Series/Index of objects or DataFrame
+    split : Series/Index or DataFrame/MultiIndex of objects
     """
-    from pandas.core.series import Series
-    from pandas.core.frame import DataFrame
-    from pandas.core.index import Index
-
-    if return_type not in ('series', 'index', 'frame'):
-        raise ValueError("return_type must be {'series', 'index', 'frame'}")
-    if return_type == 'frame' and isinstance(arr, Index):
-        raise ValueError("return_type='frame' is not supported for string "
-                         "methods on Index")
     if pat is None:
         if n is None or n == 0:
             n = -1
@@ -742,10 +730,7 @@ def str_split(arr, pat=None, n=None, return_type='series'):
                 n = 0
             regex = re.compile(pat)
             f = lambda x: regex.split(x, maxsplit=n)
-    if return_type == 'frame':
-        res = DataFrame((Series(x) for x in _na_map(f, arr)), index=arr.index)
-    else:
-        res = _na_map(f, arr)
+    res = _na_map(f, arr)
     return res
 
 
@@ -1083,7 +1068,10 @@ def _wrap_result(self, result, **kwargs):
             return DataFrame(result, index=self.series.index)
 
     def _wrap_result_expand(self, result, expand=False):
-        from pandas.core.index import Index
+        if not isinstance(expand, bool):
+            raise ValueError("expand must be True or False")
+
+        from pandas.core.index import Index, MultiIndex
         if not hasattr(result, 'ndim'):
             return result
 
@@ -1096,7 +1084,9 @@ def _wrap_result_expand(self, result, expand=False):
 
             if expand:
                 result = list(result)
-            return Index(result, name=name)
+                return MultiIndex.from_tuples(result, names=name)
+            else:
+                return Index(result, name=name)
         else:
             index = self.series.index
             if expand:
@@ -1114,10 +1104,12 @@ def cat(self, others=None, sep=None, na_rep=None):
         result = str_cat(self.series, others=others, sep=sep, na_rep=na_rep)
         return self._wrap_result(result)
 
+    @deprecate_kwarg('return_type', 'expand',
+                     mapping={'series': False, 'frame': True})
     @copy(str_split)
-    def split(self, pat=None, n=-1, return_type='series'):
-        result = str_split(self.series, pat, n=n, return_type=return_type)
-        return self._wrap_result(result)
+    def split(self, pat=None, n=-1, expand=False):
+        result = str_split(self.series, pat, n=n)
+        return self._wrap_result_expand(result, expand=expand)
 
     _shared_docs['str_partition'] = ("""
     Split the string at the %(side)s occurrence of `sep`, and return 3 elements
@@ -1131,7 +1123,7 @@ def split(self, pat=None, n=-1, return_type='series'):
         String to split on.
     expand : bool, default True
         * If True, return DataFrame/MultiIndex expanding dimensionality.
-        * If False, return Series/Index
+        * If False, return Series/Index.
 
     Returns
     -------
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 2ee9d405d9601..0c8c8be5217c3 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1280,11 +1280,12 @@ def test_str_attribute(self):
         idx = Index(['a b c', 'd e', 'f'])
         expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']])
         tm.assert_index_equal(idx.str.split(), expected)
-        tm.assert_index_equal(idx.str.split(return_type='series'), expected)
-        # return_type 'index' is an alias for 'series'
-        tm.assert_index_equal(idx.str.split(return_type='index'), expected)
-        with self.assertRaisesRegexp(ValueError, 'not supported'):
-            idx.str.split(return_type='frame')
+        tm.assert_index_equal(idx.str.split(expand=False), expected)
+
+        expected = MultiIndex.from_tuples([('a', 'b', 'c'),
+                                           ('d', 'e', np.nan),
+                                           ('f', np.nan, np.nan)])
+        tm.assert_index_equal(idx.str.split(expand=True), expected)
 
         # test boolean case, should return np.array instead of boolean Index
         idx = Index(['a1', 'a2', 'b1', 'b2'])
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 9011e6c64b097..b0d8d89d65cf2 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -1206,14 +1206,19 @@ def test_split(self):
         result = values.str.split('__')
         tm.assert_series_equal(result, exp)
 
+        result = values.str.split('__', expand=False)
+        tm.assert_series_equal(result, exp)
+
         # mixed
         mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(),
                         None, 1, 2.])
-
-        rs = Series(mixed).str.split('_')
+        rs = mixed.str.split('_')
         xp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA,
                      NA, NA, NA])
+        tm.assert_isinstance(rs, Series)
+        tm.assert_almost_equal(rs, xp)
 
+        rs = mixed.str.split('_', expand=False)
         tm.assert_isinstance(rs, Series)
         tm.assert_almost_equal(rs, xp)
 
@@ -1226,6 +1231,9 @@ def test_split(self):
                       [u('f'), u('g'), u('h')]])
         tm.assert_series_equal(result, exp)
 
+        result = values.str.split('_', expand=False)
+        tm.assert_series_equal(result, exp)
+
     def test_split_noargs(self):
         # #1859
         s = Series(['Wes McKinney', 'Travis  Oliphant'])
@@ -1259,7 +1267,10 @@ def test_split_no_pat_with_nonzero_n(self):
 
     def test_split_to_dataframe(self):
         s = Series(['nosplit', 'alsonosplit'])
-        result = s.str.split('_', return_type='frame')
+
+        with tm.assert_produces_warning():
+            result = s.str.split('_', return_type='frame')
+
         exp = DataFrame({0: Series(['nosplit', 'alsonosplit'])})
         tm.assert_frame_equal(result, exp)
 
@@ -1282,9 +1293,61 @@ def test_split_to_dataframe(self):
                         index=['preserve', 'me'])
         tm.assert_frame_equal(result, exp)
 
-        with tm.assertRaisesRegexp(ValueError, "return_type must be"):
+        with tm.assertRaisesRegexp(ValueError, "expand must be"):
+            s.str.split('_', return_type="some_invalid_type")
+
+    def test_split_to_dataframe_expand(self):
+        s = Series(['nosplit', 'alsonosplit'])
+        result = s.str.split('_', expand=True)
+        exp = DataFrame({0: Series(['nosplit', 'alsonosplit'])})
+        tm.assert_frame_equal(result, exp)
+
+        s = Series(['some_equal_splits', 'with_no_nans'])
+        result = s.str.split('_', expand=True)
+        exp = DataFrame({0: ['some', 'with'], 1: ['equal', 'no'],
+                         2: ['splits', 'nans']})
+        tm.assert_frame_equal(result, exp)
+
+        s = Series(['some_unequal_splits', 'one_of_these_things_is_not'])
+        result = s.str.split('_', expand=True)
+        exp = DataFrame({0: ['some', 'one'], 1: ['unequal', 'of'],
+                         2: ['splits', 'these'], 3: [NA, 'things'],
+                         4: [NA, 'is'], 5: [NA, 'not']})
+        tm.assert_frame_equal(result, exp)
+
+        s = Series(['some_splits', 'with_index'], index=['preserve', 'me'])
+        result = s.str.split('_', expand=True)
+        exp = DataFrame({0: ['some', 'with'], 1: ['splits', 'index']},
+                        index=['preserve', 'me'])
+        tm.assert_frame_equal(result, exp)
+
+        with tm.assertRaisesRegexp(ValueError, "expand must be"):
             s.str.split('_', return_type="some_invalid_type")
 
+    def test_split_to_multiindex_expand(self):
+        idx = Index(['nosplit', 'alsonosplit'])
+        result = idx.str.split('_', expand=True)
+        exp = Index([np.array(['nosplit']), np.array(['alsonosplit'])])
+        tm.assert_index_equal(result, exp)
+        self.assertEqual(result.nlevels, 1)
+
+        idx = Index(['some_equal_splits', 'with_no_nans'])
+        result = idx.str.split('_', expand=True)
+        exp = MultiIndex.from_tuples([('some', 'equal', 'splits'),
+                                      ('with', 'no', 'nans')])
+        tm.assert_index_equal(result, exp)
+        self.assertEqual(result.nlevels, 3)
+
+        idx = Index(['some_unequal_splits', 'one_of_these_things_is_not'])
+        result = idx.str.split('_', expand=True)
+        exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA),
+                                      ('one', 'of', 'these', 'things', 'is', 'not')])
+        tm.assert_index_equal(result, exp)
+        self.assertEqual(result.nlevels, 6)
+
+        with tm.assertRaisesRegexp(ValueError, "expand must be"):
+            idx.str.split('_', return_type="some_invalid_type")
+
     def test_partition_series(self):
         values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
 

From eafac7772d387f951aef6bbee11039bdc0ec27ee Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 9 May 2015 11:49:50 -0400
Subject: [PATCH 180/239] DOC: prepare for 0.16.1 release DOC: edits in v0.16.1
 whatsnew

---
 doc/source/release.rst          | 74 +++++++++++++++++++++++++++++++++
 doc/source/whatsnew/v0.16.1.txt | 35 ++++++++--------
 2 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 074e686ac1662..f22f95fd0a7d4 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -45,6 +45,80 @@ analysis / manipulation tool available in any language.
 * Binary installers on PyPI: http://pypi.python.org/pypi/pandas
 * Documentation: http://pandas.pydata.org
 
+pandas 0.16.1
+-------------
+
+**Release date:** (May 11, 2015)
+
+This is a minor release from 0.16.0 and includes a large number of bug fixes
+along with several new features, enhancements, and performance improvements.
+A small number of API changes were necessary to fix existing bugs.
+
+See the :ref:`v0.16.1 Whatsnew <whatsnew_0161>` overview for an extensive list
+of all API changes, enhancements and bugs that have been fixed in 0.16.1.
+
+Thanks
+~~~~~~
+
+- Alfonso MHC
+- Andy Hayden
+- Artemy Kolchinsky
+- Chris Gilmer
+- Chris Grinolds
+- Dan Birken
+- David BROCHART
+- David Hirschfeld
+- David Stephens
+- Dr. Leo
+- Evan Wright
+- Frans van Dunné
+- Hatem Nassrat
+- Henning Sperr
+- Hugo Herter
+- Jan Schulz
+- Jeff Blackburne
+- Jeff Reback
+- Jim Crist
+- Jonas Abernot
+- Joris Van den Bossche
+- Kerby Shedden
+- Leo Razoumov
+- Manuel Riel
+- Mortada Mehyar
+- Nick Burns
+- Nick Eubank
+- Olivier Grisel
+- Phillip Cloud
+- Pietro Battiston
+- Roy Hyunjin Han
+- Sam Zhang
+- Scott Sanderson
+- Stephan Hoyer
+- Tiago Antao
+- Tom Ajamian
+- Tom Augspurger
+- Tomaz Berisa
+- Vikram Shirgur
+- Vladimir Filimonov
+- William Hogman
+- Yasin A
+- Younggun Kim
+- behzad nouri
+- dsm054
+- floydsoft
+- flying-sheep
+- gfr
+- jnmclarty
+- jreback
+- ksanghai
+- lucas
+- mschmohl
+- ptype
+- rockg
+- scls19fr
+- sinhrks
+
+
 pandas 0.16.0
 -------------
 
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 0800583e15111..b3da09457a3dd 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -1,7 +1,7 @@
 .. _whatsnew_0161:
 
-v0.16.1 (April ??, 2015)
-------------------------
+v0.16.1 (May 11, 2015)
+----------------------
 
 This is a minor bug-fix release from 0.16.0 and includes a a large number of
 bug fixes along several new features, enhancements, and performance improvements.
@@ -11,12 +11,9 @@ Highlights include:
 
 - Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161.enhancements.categoricalindex>`
 - New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
-- Revised "Merge, join, and concatenate" documentation including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
-
-- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enchancements.sample>`
-
-- ``BusinessHour`` offset is supported, see :ref:`here <timeseries.businesshour>`
-
+- Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
+- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enhancements.sample>`
+- ``BusinessHour`` date-offset is now supported, see :ref:`here <timeseries.businesshour>`
 -  Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
 
 .. contents:: What's new in v0.16.1
@@ -42,7 +39,7 @@ Enhancements
 
 - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
 
-- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
+- Allow ``clip``, ``clip_lower``, and ``clip_upper`` to accept array-like arguments as thresholds (This is a regression from 0.11.0). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). (:issue:`6966`)
 
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
 
@@ -58,7 +55,7 @@ Enhancements
 - ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`)
 
 - Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
-- Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
+- Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`)
 
 - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
 - Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
@@ -68,7 +65,7 @@ Enhancements
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
-- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate :issue:`10032`.
+- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`)
 
 .. _whatsnew_0161.enhancements.categoricalindex:
 
@@ -186,10 +183,10 @@ when sampling from rows.
 String Methods Enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, following
-enhancements are performed to make string operation easier.
+:ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, the following
+enhancements make string operations easier and more consistent with standard python string operations.
 
-- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`)
+- The following new methods are accesible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`)
 
   ================  ===============  ===============  ===============  ================
   ..                ..               Methods          ..               ..
@@ -200,7 +197,7 @@ enhancements are performed to make string operation easier.
 
 
-- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
+- Added ``StringMethods`` (``.str`` accessor) to ``Index`` (:issue:`9068`)
 
   The ``.str`` accessor is now available for both ``Series`` and ``Index``.
 
@@ -294,8 +291,8 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
-- Fixed bug (:issue:`9542`) where labels did not appear properly in legend of ``DataFrame.plot()``. Passing ``label=`` args also now works, and series indices are no longer mutated.
-- Bug in json serialization when frame has length zero.(:issue:`9805`)
+- Bug where labels did not appear properly in the legend of ``DataFrame.plot()``, passing ``label=`` arguments works, and Series indices are no longer mutated. (:issue:`9542`)
+- Bug in json serialization causing a segfault when a frame had zero length. (:issue:`9805`)
 - Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`)
 - Bug in retaining index name on appending (:issue:`9862`)
 - Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`)
@@ -493,7 +490,11 @@ Bug Fixes
 =======
 - Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 5805889... Return correct subclass when slicing DataFrame.
 =======
 - BUG in median() where non-float null values are not handled correctly (:issue:`10040`)
 >>>>>>> df730a3... BUG: median() not correctly handling non-float null values (fixes #10040)
+=======
+- Bug in ``.median()`` where non-float null values are not handled correctly (:issue:`10040`)
+>>>>>>> 6c80f68... DOC: prepare for 0.16.1 release

From 9cb23b6288e06da55d9430d11c435fa1d5cb04d5 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 9 May 2015 15:56:09 -0400
Subject: [PATCH 181/239] DOC: add v0.17.0.txt whatsnew (not in index yet
 though)

---
 doc/source/whatsnew/v0.17.0.txt | 59 +++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.17.0.txt

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
new file mode 100644
index 0000000000000..0184acce7a46b
--- /dev/null
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -0,0 +1,59 @@
+.. _whatsnew_0170:
+
+v0.17.0 (July ??, 2015)
+-----------------------
+
+This is a major release from 0.16.1 and includes a small number of API changes, several new features,
+enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
+users upgrade to this version.
+
+Highlights include:
+
+
+Check the :ref:`API Changes <whatsnew_0170.api>` and :ref:`deprecations <whatsnew_0170.deprecations>` before updating.
+
+.. contents:: What's new in v0.17.0
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0170.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+.. _whatsnew_0170.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.api:
+
+Backwards incompatible API changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_0170.api_breaking:
+
+.. _whatsnew_0170.api_breaking.other:
+
+Other API Changes
+^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.deprecations:
+
+Deprecations
+^^^^^^^^^^^^
+
+.. _whatsnew_0170.prior_deprecations:
+
+Removal of prior version deprecations/changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_0170.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~

From 29063144267e8baf4900d32ef2426040deb274b1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 2 May 2015 10:03:07 -0500
Subject: [PATCH 182/239] DOC: add docstring for PeriodIndex.asfreq

---
 pandas/tseries/period.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index a4b754f5a6bbd..010d6dfb42f63 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -355,6 +355,44 @@ def freqstr(self):
         return self.freq
 
     def asfreq(self, freq=None, how='E'):
+        """
+        Convert the PeriodIndex to the specified frequency `freq`.
+
+        Parameters
+        ----------
+
+        freq : str
+            a frequency
+        how : str {'E', 'S'}
+            'E', 'END', or 'FINISH' for end,
+            'S', 'START', or 'BEGIN' for start.
+            Whether the elements should be aligned to the end
+            or start within pa period. January 31st ('END') vs.
+            Janury 1st ('START') for example.
+
+        Returns
+        -------
+
+        new : PeriodIndex with the new frequency
+
+        Examples
+        --------
+        >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
+        >>> pidx
+        <class 'pandas.tseries.period.PeriodIndex'>
+        [2010, ..., 2015]
+        Length: 6, Freq: A-DEC
+
+        >>> pidx.asfreq('M')
+        <class 'pandas.tseries.period.PeriodIndex'>
+        [2010-12, ..., 2015-12]
+        Length: 6, Freq: M
+
+        >>> pidx.asfreq('M', how='S')
+        <class 'pandas.tseries.period.PeriodIndex'>
+        [2010-01, ..., 2015-01]
+        Length: 6, Freq: M
+        """
         how = _validate_end_alias(how)
 
         freq = frequencies.get_standard_freq(freq)

From c8855ef49c31447d5bcd28901e79dc0cd563bf5f Mon Sep 17 00:00:00 2001
From: Henning Sperr <henning.sperr@gmail.com>
Date: Tue, 14 Apr 2015 11:03:49 -0400
Subject: [PATCH 183/239] ENH: repr now shows index name #6482

move tests to generically tests for index
generify __unicode__ for Index

adjust index display to max_seq_items
---
 pandas/core/index.py              |  64 +++++++++++-
 pandas/tests/test_index.py        |  38 ++++++-
 pandas/tseries/base.py            |  42 ++++----
 pandas/tseries/index.py           |   6 +-
 pandas/tseries/period.py          |   4 -
 pandas/tseries/tdi.py             |   4 -
 pandas/tseries/tests/test_base.py | 159 +++++++++++++++++++-----------
 pandas/util/testing.py            |   2 +-
 8 files changed, 220 insertions(+), 99 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index 9ee040ed30ccd..6653c98741660 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -392,8 +392,68 @@ def __unicode__(self):
         Invoked by unicode(df) in py2 only. Yields a Unicode String in both
         py2/py3.
         """
-        prepr = default_pprint(self)
-        return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
+        klass = self.__class__.__name__
+        data = self._format_data()
+        attrs = self._format_attrs()
+        max_seq_items = get_option('display.max_seq_items')
+        if len(self) > max_seq_items:
+            space = "\n%s" % (' ' * (len(klass) + 1))
+        else:
+            space = " "
+
+        prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
+                                          for k, v in attrs])
+        res = u("%s(%s,%s%s)") % (klass,
+                                  data,
+                                  space,
+                                  prepr)
+
+        return res
+
+    @property
+    def _formatter_func(self):
+        """
+        Return the formatted data as a unicode string
+        """
+        return default_pprint
+
+    def _format_data(self):
+        """
+        Return the formatted data as a unicode string
+        """
+
+        max_seq_items = get_option('display.max_seq_items')
+        formatter = self._formatter_func
+        n = len(self)
+        if n == 0:
+            summary = '[]'
+        elif n == 1:
+            first = formatter(self[0])
+            summary = '[%s]' % first
+        elif n == 2:
+            first = formatter(self[0])
+            last = formatter(self[-1])
+            summary = '[%s, %s]' % (first, last)
+        elif n > max_seq_items:
+            n = min(max_seq_items//2,2)
+            head = ', '.join([ formatter(x) for x in self[:n] ])
+            tail = ', '.join([ formatter(x) for x in self[-n:] ])
+            summary = '[%s, ..., %s]' % (head, tail)
+        else:
+            summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
+
+        return summary
+
+
+    def _format_attrs(self):
+        """
+        Return a list of tuples of the (attr,formatted_value)
+        """
+        attrs = []
+        if self.name is not None:
+            attrs.append(('name',default_pprint(self.name)))
+        attrs.append(('dtype',"'%s'" % self.dtype))
+        return attrs
 
     def to_series(self, **kwargs):
         """
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 0c8c8be5217c3..e69ab8909a298 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -120,6 +120,19 @@ def test_ndarray_compat_properties(self):
         idx.nbytes
         idx.values.nbytes
 
+    def test_repr_roundtrip(self):
+
+        idx = self.create_index()
+        tm.assert_index_equal(eval(repr(idx)),idx)
+
+    def test_str(self):
+
+        # test the string repr
+        idx = self.create_index()
+        idx.name = 'foo'
+        self.assertTrue("'foo'" in str(idx))
+        self.assertTrue(idx.__class__.__name__ in str(idx))
+
     def test_wrong_number_names(self):
         def testit(ind):
             ind.names = ["apple", "banana", "carrot"]
@@ -2475,6 +2488,26 @@ def test_slice_keep_name(self):
 
 class DatetimeLike(Base):
 
+    def test_repr_roundtrip(self):
+        raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
+
+    def test_str(self):
+
+        # test the string repr
+        idx = self.create_index()
+        idx.name = 'foo'
+        self.assertTrue("length=%s" % len(idx) in str(idx))
+        self.assertTrue("u'foo'" in str(idx))
+        self.assertTrue(idx.__class__.__name__ in str(idx))
+
+        if hasattr(idx,'tz'):
+            if idx.tz is not None:
+                self.assertTrue("tz='%s'" % idx.tz in str(idx))
+            else:
+                self.assertTrue("tz=None" in str(idx))
+        if hasattr(idx,'freq'):
+            self.assertTrue("freq='%s'" % idx.freqstr in str(idx))
+
     def test_view(self):
         super(DatetimeLike, self).test_view()
 
@@ -4388,8 +4421,9 @@ def test_repr_with_unicode_data(self):
             index = pd.DataFrame(d).set_index(["a", "b"]).index
             self.assertFalse("\\u" in repr(index))  # we don't want unicode-escaped
 
-    def test_repr_roundtrip(self):
-        tm.assert_index_equal(eval(repr(self.index)), self.index)
+    def test_str(self):
+        # tested elsewhere
+        pass
 
     def test_unicode_string_with_unicode(self):
         d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 5f3130bd2dd9c..4943d2625f898 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -265,30 +265,24 @@ def _formatter_func(self):
         """
         return str
 
-    def _format_footer(self):
-        raise AbstractMethodError(self)
-
-    def __unicode__(self):
-        formatter = self._formatter_func
-        summary = str(self.__class__) + '\n'
-
-        n = len(self)
-        if n == 0:
-            pass
-        elif n == 1:
-            first = formatter(self[0])
-            summary += '[%s]\n' % first
-        elif n == 2:
-            first = formatter(self[0])
-            last = formatter(self[-1])
-            summary += '[%s, %s]\n' % (first, last)
-        else:
-            first = formatter(self[0])
-            last = formatter(self[-1])
-            summary += '[%s, ..., %s]\n' % (first, last)
-
-        summary += self._format_footer()
-        return summary
+    def _format_attrs(self):
+        """
+        Return a list of tuples of the (attr,formatted_value)
+        """
+        attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
+        attrs.append(('length',len(self)))
+        for attrib in self._attributes:
+            if attrib == 'freq':
+                freq = self.freqstr
+                if freq is not None:
+                    freq = "'%s'" % freq
+                attrs.append(('freq',freq))
+            elif attrib == 'tz':
+                tz = self.tz
+                if tz is not None:
+                    tz = "'%s'" % tz
+                attrs.append(('tz',tz))
+        return attrs
 
     @cache_readonly
     def _resolution(self):
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 7b0ff578b0d90..f56b40a70d551 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -597,7 +597,7 @@ def _is_dates_only(self):
     def _formatter_func(self):
         from pandas.core.format import _get_format_datetime64
         formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
-        return lambda x: formatter(x, tz=self.tz)
+        return lambda x: "'%s'" % formatter(x, tz=self.tz)
 
     def __reduce__(self):
 
@@ -684,10 +684,6 @@ def _format_native_types(self, na_rep=u('NaT'),
     def to_datetime(self, dayfirst=False):
         return self.copy()
 
-    def _format_footer(self):
-        tagline = 'Length: %d, Freq: %s, Timezone: %s'
-        return tagline % (len(self), self.freqstr, self.tz)
-
     def astype(self, dtype):
         dtype = np.dtype(dtype)
 
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 010d6dfb42f63..0becb69683120 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -735,10 +735,6 @@ def __array_finalize__(self, obj):
         self.name = getattr(obj, 'name', None)
         self._reset_identity()
 
-    def _format_footer(self):
-        tagline = 'Length: %d, Freq: %s'
-        return tagline % (len(self), self.freqstr)
-
     def take(self, indices, axis=None):
         """
         Analogous to ndarray.take
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index 86c427682c553..80475fc8426db 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -274,10 +274,6 @@ def _formatter_func(self):
         from pandas.core.format import _get_format_timedelta64
         return _get_format_timedelta64(self, box=True)
 
-    def _format_footer(self):
-        tagline = 'Length: %d, Freq: %s'
-        return tagline % (len(self), self.freqstr)
-
     def __setstate__(self, state):
         """Necessary for making this object picklable"""
         if isinstance(state, dict):
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index 677173e9a75e9..1420e73cb2626 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -119,23 +119,41 @@ def test_representation(self):
         idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
                              tz='US/Eastern')
 
-        exp1 = """<class 'pandas.tseries.index.DatetimeIndex'>
-Length: 0, Freq: D, Timezone: None"""
-        exp2 = """<class 'pandas.tseries.index.DatetimeIndex'>
-[2011-01-01]
-Length: 1, Freq: D, Timezone: None"""
-        exp3 = """<class 'pandas.tseries.index.DatetimeIndex'>
-[2011-01-01, 2011-01-02]
-Length: 2, Freq: D, Timezone: None"""
-        exp4 = """<class 'pandas.tseries.index.DatetimeIndex'>
-[2011-01-01, ..., 2011-01-03]
-Length: 3, Freq: D, Timezone: None"""
-        exp5 = """<class 'pandas.tseries.index.DatetimeIndex'>
-[2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00]
-Length: 3, Freq: H, Timezone: Asia/Tokyo"""
-        exp6 = """<class 'pandas.tseries.index.DatetimeIndex'>
-[2011-01-01 09:00:00-05:00, ..., NaT]
-Length: 3, Freq: None, Timezone: US/Eastern"""
+        exp1 = """DatetimeIndex([],
+              dtype='datetime64[ns]',
+              length=0,
+              freq='D',
+              tz=None)"""
+
+        exp2 = """DatetimeIndex([2011-01-01],
+              dtype='datetime64[ns]',
+              length=1,
+              freq='D',
+              tz=None)"""
+
+        exp3 = """DatetimeIndex([2011-01-01, 2011-01-02],
+              dtype='datetime64[ns]',
+              length=2,
+              freq='D',
+              tz=None)"""
+
+        exp4 = """DatetimeIndex([2011-01-01, ..., 2011-01-03],
+              dtype='datetime64[ns]',
+              length=3,
+              freq='D',
+              tz=None)"""
+
+        exp5 = """DatetimeIndex([2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00],
+              dtype='datetime64[ns]',
+              length=3,
+              freq='H',
+              tz='Asia/Tokyo')"""
+
+        exp6 = """DatetimeIndex([2011-01-01 09:00:00-05:00, ..., NaT],
+              dtype='datetime64[ns]',
+              length=3,
+              freq=None,
+              tz='US/Eastern')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
                                  [exp1, exp2, exp3, exp4, exp5, exp6]):
@@ -372,21 +390,30 @@ def test_representation(self):
         idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
         idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
 
+        exp1 = """TimedeltaIndex([],
+               dtype='timedelta64[ns]',
+               length=0,
+               freq='D')"""
 
-        exp1 = """<class 'pandas.tseries.tdi.TimedeltaIndex'>
-Length: 0, Freq: D"""
-        exp2 = """<class 'pandas.tseries.tdi.TimedeltaIndex'>
-['1 days']
-Length: 1, Freq: D"""
-        exp3 = """<class 'pandas.tseries.tdi.TimedeltaIndex'>
-['1 days', '2 days']
-Length: 2, Freq: D"""
-        exp4 = """<class 'pandas.tseries.tdi.TimedeltaIndex'>
-['1 days', ..., '3 days']
-Length: 3, Freq: D"""
-        exp5 = """<class 'pandas.tseries.tdi.TimedeltaIndex'>
-['1 days 00:00:01', ..., '3 days 00:00:00']
-Length: 3, Freq: None"""
+        exp2 = """TimedeltaIndex(['1 days'],
+               dtype='timedelta64[ns]',
+               length=1,
+               freq='D')"""
+
+        exp3 = """TimedeltaIndex(['1 days', '2 days'],
+               dtype='timedelta64[ns]',
+               length=2,
+               freq='D')"""
+
+        exp4 = """TimedeltaIndex(['1 days', ..., '3 days'],
+               dtype='timedelta64[ns]',
+               length=3,
+               freq='D')"""
+
+        exp5 = """TimedeltaIndex(['1 days 00:00:01', ..., '3 days 00:00:00'],
+               dtype='timedelta64[ns]',
+               length=3,
+               freq=None)"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                  [exp1, exp2, exp3, exp4, exp5]):
@@ -842,32 +869,50 @@ def test_representation(self):
         idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
         idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
 
-        exp1 = """<class 'pandas.tseries.period.PeriodIndex'>
-Length: 0, Freq: D"""
-        exp2 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2011-01-01]
-Length: 1, Freq: D"""
-        exp3 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2011-01-01, 2011-01-02]
-Length: 2, Freq: D"""
-        exp4 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2011-01-01, ..., 2011-01-03]
-Length: 3, Freq: D"""
-        exp5 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2011, ..., 2013]
-Length: 3, Freq: A-DEC"""
-        exp6 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2011-01-01 09:00, ..., NaT]
-Length: 3, Freq: H"""
-        exp7 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2013Q1]
-Length: 1, Freq: Q-DEC"""
-        exp8 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2013Q1, 2013Q2]
-Length: 2, Freq: Q-DEC"""
-        exp9 = """<class 'pandas.tseries.period.PeriodIndex'>
-[2013Q1, ..., 2013Q3]
-Length: 3, Freq: Q-DEC"""
+        exp1 = """PeriodIndex([],
+            dtype='int64',
+            length=0,
+            freq='D')"""
+
+        exp2 = """PeriodIndex([2011-01-01],
+            dtype='int64',
+            length=1,
+            freq='D')"""
+
+        exp3 = """PeriodIndex([2011-01-01, 2011-01-02],
+            dtype='int64',
+            length=2,
+            freq='D')"""
+
+        exp4 = """PeriodIndex([2011-01-01, ..., 2011-01-03],
+            dtype='int64',
+            length=3,
+            freq='D')"""
+
+        exp5 = """PeriodIndex([2011, ..., 2013],
+            dtype='int64',
+            length=3,
+            freq='A-DEC')"""
+
+        exp6 = """PeriodIndex([2011-01-01 09:00, ..., NaT],
+            dtype='int64',
+            length=3,
+            freq='H')"""
+
+        exp7 = """PeriodIndex([2013Q1],
+            dtype='int64',
+            length=1,
+            freq='Q-DEC')"""
+
+        exp8 = """PeriodIndex([2013Q1, 2013Q2],
+            dtype='int64',
+            length=2,
+            freq='Q-DEC')"""
+
+        exp9 = """PeriodIndex([2013Q1, ..., 2013Q3],
+            dtype='int64',
+            length=3,
+            freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 5c50b952f8109..55f95b602779f 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1627,7 +1627,7 @@ class _AssertRaisesContextmanager(object):
     def __init__(self, exception, regexp=None, *args, **kwargs):
         self.exception = exception
         if regexp is not None and not hasattr(regexp, "search"):
-            regexp = re.compile(regexp)
+            regexp = re.compile(regexp, re.DOTALL)
         self.regexp = regexp
 
     def __enter__(self):

From df22a2336a98db1f01d98bd62ad8e56a4f0b7ed9 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 15 Apr 2015 18:53:33 -0400
Subject: [PATCH 184/239] formatting MultiIndex

---
 doc/source/whatsnew/v0.16.1.txt   |  45 ++++++++++
 pandas/core/config_init.py        |   2 +-
 pandas/core/index.py              |  72 ++++++++-------
 pandas/tests/test_format.py       |   8 +-
 pandas/tests/test_index.py        |   7 +-
 pandas/tseries/base.py            |   8 +-
 pandas/tseries/period.py          |   4 +
 pandas/tseries/tests/test_base.py | 140 ++++++++----------------------
 8 files changed, 131 insertions(+), 155 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index b3da09457a3dd..a726d67928c25 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -268,6 +268,51 @@ API changes
 
 - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
+.. _whatsnew_0161.index_repr:
+
+Index Representation
+~~~~~~~~~~~~~~~~~~~~
+
+The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
+formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
+which is now defaulted to 10 (previously was 100). (:issue:`6482`)
+
+Previous Behavior
+
+.. code-block:: python
+
+
+   In [1]: pd.get_option('max_seq_items')
+   Out[1]: 100
+
+   In [2]: pd.Index(range(4),name='foo')
+   Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
+
+   In [3]: pd.Index(range(104),name='foo')
+   Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
+
+   In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   Out[4]:
+   <class 'pandas.tseries.index.DatetimeIndex'>
+   [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
+   Length: 4, Freq: D, Timezone: US/Eastern
+
+   In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
+   Out[5]:
+   <class 'pandas.tseries.index.DatetimeIndex'>
+   [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
+   Length: 104, Freq: D, Timezone: US/Eastern
+
+New Behavior
+
+.. ipython:: python
+
+   pd.get_option('max_seq_items')
+   pd.Index(range(4),name='foo')
+   pd.Index(range(104),name='foo')
+   pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
+
 .. _whatsnew_0161.deprecations:
 
 Deprecations
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index a56d3b93d87da..93a3c30cb8360 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 100, pc_max_seq_items)
+    cf.register_option('max_seq_items', 10, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 6653c98741660..e54811372d9ff 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -395,21 +395,33 @@ def __unicode__(self):
         klass = self.__class__.__name__
         data = self._format_data()
         attrs = self._format_attrs()
-        max_seq_items = get_option('display.max_seq_items')
-        if len(self) > max_seq_items:
-            space = "\n%s" % (' ' * (len(klass) + 1))
-        else:
-            space = " "
+        space = self._format_space()
 
         prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
                                           for k, v in attrs])
-        res = u("%s(%s,%s%s)") % (klass,
-                                  data,
-                                  space,
-                                  prepr)
+
+        # no data provided, just attributes
+        if data is None:
+            data = ''
+        else:
+            data = "%s,%s" % (data, space)
+
+        res = u("%s(%s%s)") % (klass,
+                               data,
+                               prepr)
 
         return res
 
+    def _format_space(self):
+
+        # using space here controls if the attributes
+        # are line separated or not (the default)
+
+        #max_seq_items = get_option('display.max_seq_items')
+        #if len(self) > max_seq_items:
+        #    space = "\n%s" % (' ' * (len(klass) + 1))
+        return " "
+
     @property
     def _formatter_func(self):
         """
@@ -421,7 +433,6 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
-
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
         n = len(self)
@@ -450,9 +461,12 @@ def _format_attrs(self):
         Return a list of tuples of the (attr,formatted_value)
         """
         attrs = []
+        attrs.append(('dtype',"'%s'" % self.dtype))
         if self.name is not None:
             attrs.append(('name',default_pprint(self.name)))
-        attrs.append(('dtype',"'%s'" % self.dtype))
+        max_seq_items = get_option('display.max_seq_items')
+        if len(self) > max_seq_items:
+            attrs.append(('length',len(self)))
         return attrs
 
     def to_series(self, **kwargs):
@@ -3937,40 +3951,24 @@ def nbytes(self):
         names_nbytes = sum(( getsizeof(i) for i in self.names ))
         return level_nbytes + label_nbytes + names_nbytes
 
-    def __repr__(self):
-        encoding = get_option('display.encoding')
+    def _format_attrs(self):
+        """
+        Return a list of tuples of the (attr,formatted_value)
+        """
         attrs = [('levels', default_pprint(self.levels)),
                  ('labels', default_pprint(self.labels))]
         if not all(name is None for name in self.names):
             attrs.append(('names', default_pprint(self.names)))
         if self.sortorder is not None:
             attrs.append(('sortorder', default_pprint(self.sortorder)))
+        return attrs
 
-        space = ' ' * (len(self.__class__.__name__) + 1)
-        prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
-                                          for k, v in attrs])
-        res = u("%s(%s)") % (self.__class__.__name__, prepr)
-
-        if not compat.PY3:
-            # needs to be str in Python 2
-            res = res.encode(encoding)
-        return res
-
-    def __unicode__(self):
-        """
-        Return a string representation for a particular Index
+    def _format_space(self):
+        return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
 
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        rows = self.format(names=True)
-        max_rows = get_option('display.max_rows')
-        if len(rows) > max_rows:
-            spaces = (len(rows[0]) - 3) // 2
-            centered = ' ' * spaces
-            half = max_rows // 2
-            rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
-        return "\n".join(rows)
+    def _format_data(self):
+        # we are formatting thru the attributes
+        return None
 
     def __len__(self):
         return len(self.labels[0])
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index f0afef71b3381..1005943e914dd 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -3215,13 +3215,13 @@ def test_date_explict_date_format(self):
 class TestDatetimeIndexUnicode(tm.TestCase):
     def test_dates(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
-        self.assertTrue("[2013-01-01," in text)
-        self.assertTrue(", 2014-01-01]" in text)
+        self.assertTrue("['2013-01-01'," in text)
+        self.assertTrue(", '2014-01-01']" in text)
 
     def test_mixed(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
-        self.assertTrue("[2013-01-01 00:00:00," in text)
-        self.assertTrue(", 2014-01-01 00:00:00]" in text)
+        self.assertTrue("['2013-01-01 00:00:00'," in text)
+        self.assertTrue(", '2014-01-01 00:00:00']" in text)
 
 
 class TestStringRepTimestamp(tm.TestCase):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index e69ab8909a298..80cbbfb0734e5 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -2488,16 +2488,13 @@ def test_slice_keep_name(self):
 
 class DatetimeLike(Base):
 
-    def test_repr_roundtrip(self):
-        raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
-
     def test_str(self):
 
         # test the string repr
         idx = self.create_index()
         idx.name = 'foo'
-        self.assertTrue("length=%s" % len(idx) in str(idx))
-        self.assertTrue("u'foo'" in str(idx))
+        self.assertFalse("length=%s" % len(idx) in str(idx))
+        self.assertTrue("'foo'" in str(idx))
         self.assertTrue(idx.__class__.__name__ in str(idx))
 
         if hasattr(idx,'tz'):
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 4943d2625f898..e7a297860efb0 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -260,17 +260,13 @@ def argmax(self, axis=None):
 
     @property
     def _formatter_func(self):
-        """
-        Format function to convert value to representation
-        """
-        return str
+        raise AbstractMethodError(self)
 
     def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
         attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
-        attrs.append(('length',len(self)))
         for attrib in self._attributes:
             if attrib == 'freq':
                 freq = self.freqstr
@@ -504,4 +500,6 @@ def summary(self, name=None):
         if self.freq:
             result += '\nFreq: %s' % self.freqstr
 
+        # display as values, not quoted
+        result = result.replace("'","")
         return result
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 0becb69683120..8b7dc90738bd0 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
         """ return an array repr of this object, potentially casting to object """
         return self.asobject.values
 
+    @property
+    def _formatter_func(self):
+        return lambda x: "'%s'" % x
+
     def asof_locs(self, where, mask):
         """
         where : array of timestamps
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index 1420e73cb2626..1a132966c0bce 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -119,41 +119,17 @@ def test_representation(self):
         idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
                              tz='US/Eastern')
 
-        exp1 = """DatetimeIndex([],
-              dtype='datetime64[ns]',
-              length=0,
-              freq='D',
-              tz=None)"""
-
-        exp2 = """DatetimeIndex([2011-01-01],
-              dtype='datetime64[ns]',
-              length=1,
-              freq='D',
-              tz=None)"""
-
-        exp3 = """DatetimeIndex([2011-01-01, 2011-01-02],
-              dtype='datetime64[ns]',
-              length=2,
-              freq='D',
-              tz=None)"""
-
-        exp4 = """DatetimeIndex([2011-01-01, ..., 2011-01-03],
-              dtype='datetime64[ns]',
-              length=3,
-              freq='D',
-              tz=None)"""
-
-        exp5 = """DatetimeIndex([2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00],
-              dtype='datetime64[ns]',
-              length=3,
-              freq='H',
-              tz='Asia/Tokyo')"""
-
-        exp6 = """DatetimeIndex([2011-01-01 09:00:00-05:00, ..., NaT],
-              dtype='datetime64[ns]',
-              length=3,
-              freq=None,
-              tz='US/Eastern')"""
+        exp1 = """DatetimeIndex([], dtype='datetime64[ns]', freq='D', tz=None)"""
+
+        exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
+
+        exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
+
+        exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
+
+        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
+
+        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
                                  [exp1, exp2, exp3, exp4, exp5, exp6]):
@@ -390,30 +366,15 @@ def test_representation(self):
         idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
         idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
 
-        exp1 = """TimedeltaIndex([],
-               dtype='timedelta64[ns]',
-               length=0,
-               freq='D')"""
+        exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')"""
 
-        exp2 = """TimedeltaIndex(['1 days'],
-               dtype='timedelta64[ns]',
-               length=1,
-               freq='D')"""
+        exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp3 = """TimedeltaIndex(['1 days', '2 days'],
-               dtype='timedelta64[ns]',
-               length=2,
-               freq='D')"""
+        exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp4 = """TimedeltaIndex(['1 days', ..., '3 days'],
-               dtype='timedelta64[ns]',
-               length=3,
-               freq='D')"""
+        exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp5 = """TimedeltaIndex(['1 days 00:00:01', ..., '3 days 00:00:00'],
-               dtype='timedelta64[ns]',
-               length=3,
-               freq=None)"""
+        exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                  [exp1, exp2, exp3, exp4, exp5]):
@@ -431,13 +392,13 @@ def test_summary(self):
 
         exp1 = """TimedeltaIndex: 0 entries
 Freq: D"""
-        exp2 = """TimedeltaIndex: 1 entries, '1 days' to '1 days'
+        exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days
 Freq: D"""
-        exp3 = """TimedeltaIndex: 2 entries, '1 days' to '2 days'
+        exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days
 Freq: D"""
-        exp4 = """TimedeltaIndex: 3 entries, '1 days' to '3 days'
+        exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days
 Freq: D"""
-        exp5 = """TimedeltaIndex: 3 entries, '1 days 00:00:01' to '3 days 00:00:00'"""
+        exp5 = """TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                  [exp1, exp2, exp3, exp4, exp5]):
@@ -869,50 +830,23 @@ def test_representation(self):
         idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
         idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
 
-        exp1 = """PeriodIndex([],
-            dtype='int64',
-            length=0,
-            freq='D')"""
-
-        exp2 = """PeriodIndex([2011-01-01],
-            dtype='int64',
-            length=1,
-            freq='D')"""
-
-        exp3 = """PeriodIndex([2011-01-01, 2011-01-02],
-            dtype='int64',
-            length=2,
-            freq='D')"""
-
-        exp4 = """PeriodIndex([2011-01-01, ..., 2011-01-03],
-            dtype='int64',
-            length=3,
-            freq='D')"""
-
-        exp5 = """PeriodIndex([2011, ..., 2013],
-            dtype='int64',
-            length=3,
-            freq='A-DEC')"""
-
-        exp6 = """PeriodIndex([2011-01-01 09:00, ..., NaT],
-            dtype='int64',
-            length=3,
-            freq='H')"""
-
-        exp7 = """PeriodIndex([2013Q1],
-            dtype='int64',
-            length=1,
-            freq='Q-DEC')"""
-
-        exp8 = """PeriodIndex([2013Q1, 2013Q2],
-            dtype='int64',
-            length=2,
-            freq='Q-DEC')"""
-
-        exp9 = """PeriodIndex([2013Q1, ..., 2013Q3],
-            dtype='int64',
-            length=3,
-            freq='Q-DEC')"""
+        exp1 = """PeriodIndex([], dtype='int64', freq='D')"""
+
+        exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
+
+        exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
+
+        exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
+
+        exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
+
+        exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
+
+        exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
+
+        exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
+
+        exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

From 82cb33bfbcc3131637bac7db7f8acb1026e64688 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 20 Apr 2015 07:44:32 -0400
Subject: [PATCH 185/239] fixup for CategoricalIndex merge

increase limits for max_seq_items & printing for Index

add extended repr for datetimelike indexes

fix tseries/test_base for repr

adjust docs for repr-name

use new format_data on all Index types
---
 doc/source/advanced.rst           |  5 +-
 doc/source/whatsnew/v0.16.1.txt   |  6 +--
 pandas/core/common.py             | 28 +++++++----
 pandas/core/config_init.py        |  2 +-
 pandas/core/index.py              | 78 +++++++++++++++++++------------
 pandas/tests/test_index.py        | 26 +++++++++--
 pandas/tseries/base.py            |  7 +--
 pandas/tseries/tests/test_base.py | 48 +++++++++++++------
 8 files changed, 133 insertions(+), 67 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 688935c6b104d..262c439cde636 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -675,10 +675,7 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
                                }).set_index('B')
 
       In [11]: df3.index
-      Out[11]:
-      CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'],
-                       categories=[u'a', u'b', u'c'],
-                       ordered=False)
+      Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
 
       In [12]: pd.concat([df2,df3]
       TypeError: categories must match existing categories when appending
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index a726d67928c25..9d2a708f1e2d9 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -273,15 +273,13 @@ API changes
 Index Representation
 ~~~~~~~~~~~~~~~~~~~~
 
-The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
-formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
-which is now defaulted to 10 (previously was 100). (:issue:`6482`)
+The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
+which is now defaulted to 20 (previously was 100). (:issue:`6482`)
 
 Previous Behavior
 
 .. code-block:: python
 
-
    In [1]: pd.get_option('max_seq_items')
    Out[1]: 100
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 8535cf1566e2d..3c92300d1f9a5 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -3132,7 +3132,7 @@ def in_ipython_frontend():
 #    working with straight ascii.
 
 
-def _pprint_seq(seq, _nest_lvl=0, **kwds):
+def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
     """
     internal. pprinter for iterables. you should probably use pprint_thing()
     rather then calling this directly.
@@ -3144,12 +3144,15 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
     else:
         fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)")
 
-    nitems = get_option("max_seq_items") or len(seq)
+    if max_seq_items is False:
+        nitems = len(seq)
+    else:
+        nitems = max_seq_items or get_option("max_seq_items") or len(seq)
 
     s = iter(seq)
     r = []
     for i in range(min(nitems, len(seq))):  # handle sets, no slicing
-        r.append(pprint_thing(next(s), _nest_lvl + 1, **kwds))
+        r.append(pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
     body = ", ".join(r)
 
     if nitems < len(seq):
@@ -3160,7 +3163,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
     return fmt % body
 
 
-def _pprint_dict(seq, _nest_lvl=0, **kwds):
+def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
     """
     internal. pprinter for iterables. you should probably use pprint_thing()
     rather then calling this directly.
@@ -3170,11 +3173,14 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
 
     pfmt = u("%s: %s")
 
-    nitems = get_option("max_seq_items") or len(seq)
+    if max_seq_items is False:
+        nitems = len(seq)
+    else:
+        nitems = max_seq_items or get_option("max_seq_items") or len(seq)
 
     for k, v in list(seq.items())[:nitems]:
-        pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, **kwds),
-                             pprint_thing(v, _nest_lvl + 1, **kwds)))
+        pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
+                             pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)))
 
     if nitems < len(seq):
         return fmt % (", ".join(pairs) + ", ...")
@@ -3183,7 +3189,7 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
 
 
 def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
-                 quote_strings=False):
+                 quote_strings=False, max_seq_items=None):
     """
     This function is the sanctioned way of converting objects
     to a unicode representation.
@@ -3202,6 +3208,8 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
         replacements
     default_escapes : bool, default False
         Whether the input escape characters replaces or adds to the defaults
+    max_seq_items : False, int, default None
+        Pass thru to other pretty printers to limit sequence printing
 
     Returns
     -------
@@ -3240,11 +3248,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
         return compat.text_type(thing)
     elif (isinstance(thing, dict) and
           _nest_lvl < get_option("display.pprint_nest_depth")):
-        result = _pprint_dict(thing, _nest_lvl, quote_strings=True)
+        result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items)
     elif is_sequence(thing) and _nest_lvl < \
             get_option("display.pprint_nest_depth"):
         result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
-                             quote_strings=quote_strings)
+                             quote_strings=quote_strings, max_seq_items=max_seq_items)
     elif isinstance(thing, compat.string_types) and quote_strings:
         if compat.PY3:
             fmt = "'%s'"
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 93a3c30cb8360..e3a6a3b0641a5 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 10, pc_max_seq_items)
+    cf.register_option('max_seq_items', 20, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
index e54811372d9ff..b793f0bf3c961 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -26,8 +26,10 @@
 from pandas.io.common import PerformanceWarning
 
 # simplify
-default_pprint = lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'),
-                                            quote_strings=True)
+default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x,
+                                                                escape_chars=('\t', '\r', '\n'),
+                                                                quote_strings=True,
+                                                                max_seq_items=max_seq_items)
 
 
 __all__ = ['Index']
@@ -430,6 +432,37 @@ def _formatter_func(self):
         return default_pprint
 
     def _format_data(self):
+        """
+        Return the formatted data as a unicode string
+        """
+        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
+        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
+        sep = ',%s' % space1
+        max_seq_items = get_option('display.max_seq_items')
+        formatter = self._formatter_func
+        n = len(self)
+        if n == 0:
+            summary = '[]'
+        elif n == 1:
+            first = formatter(self[0])
+            summary = '[%s]' % first
+        elif n == 2:
+            first = formatter(self[0])
+            last = formatter(self[-1])
+            summary = '[%s%s%s]' % (first, sep, last)
+        elif n > max_seq_items:
+            n = min(max_seq_items//2,10)
+
+            head = sep.join([ formatter(x) for x in self[:n] ])
+            tail = sep.join([ formatter(x) for x in self[-n:] ])
+            summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
+        else:
+            values = sep.join([ formatter(x) for x in self ])
+            summary = '[%s]' % (values)
+
+        return summary
+
+    def _format_data2(self):
         """
         Return the formatted data as a unicode string
         """
@@ -446,7 +479,7 @@ def _format_data(self):
             last = formatter(self[-1])
             summary = '[%s, %s]' % (first, last)
         elif n > max_seq_items:
-            n = min(max_seq_items//2,2)
+            n = min(max_seq_items//2,5)
             head = ', '.join([ formatter(x) for x in self[:n] ])
             tail = ', '.join([ formatter(x) for x in self[-n:] ])
             summary = '[%s, ..., %s]' % (head, tail)
@@ -2874,32 +2907,19 @@ def equals(self, other):
 
         return False
 
-    def __unicode__(self):
+    def _format_attrs(self):
         """
-        Return a string representation for this object.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
+        Return a list of tuples of the (attr,formatted_value)
         """
-
-        # currently doesn't use the display.max_categories, or display.max_seq_len
-        # for head/tail printing
-        values = default_pprint(self.values.get_values())
-        cats = default_pprint(self.categories.get_values())
-        space = ' ' * (len(self.__class__.__name__) + 1)
-        name = self.name
-        if name is not None:
-            name = default_pprint(name)
-
-        result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format(
-            klass=self.__class__.__name__,
-            values=values,
-            categories=cats,
-            ordered=self.ordered,
-            name=name,
-            space=space)
-
-        return result
+        attrs = [('categories', default_pprint(self.categories)),
+                 ('ordered',self.ordered)]
+        if self.name is not None:
+            attrs.append(('name',default_pprint(self.name)))
+        attrs.append(('dtype',"'%s'" % self.dtype))
+        max_seq_items = get_option('display.max_seq_items')
+        if len(self) > max_seq_items:
+            attrs.append(('length',len(self)))
+        return attrs
 
     @property
     def inferred_type(self):
@@ -3955,8 +3975,8 @@ def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
-        attrs = [('levels', default_pprint(self.levels)),
-                 ('labels', default_pprint(self.labels))]
+        attrs = [('levels', default_pprint(self._levels, max_seq_items=False)),
+                 ('labels', default_pprint(self._labels, max_seq_items=False))]
         if not all(name is None for name in self.names):
             attrs.append(('names', default_pprint(self.names)))
         if self.sortorder is not None:
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 80cbbfb0734e5..17a41d3f52038 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1712,7 +1712,7 @@ def test_get_indexer(self):
         self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill'))
         self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest'))
 
-    def test_repr(self):
+    def test_repr_roundtrip(self):
 
         ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
         str(ci)
@@ -1725,9 +1725,12 @@ def test_repr(self):
             compat.text_type(ci)
 
         # long format
+        # this is not reprable
         ci = CategoricalIndex(np.random.randint(0,5,size=100))
-        result = str(ci)
-        tm.assert_index_equal(eval(repr(ci)),ci,exact=True)
+        if compat.PY3:
+            str(ci)
+        else:
+            compat.text_type(ci)
 
     def test_isin(self):
 
@@ -4418,6 +4421,23 @@ def test_repr_with_unicode_data(self):
             index = pd.DataFrame(d).set_index(["a", "b"]).index
             self.assertFalse("\\u" in repr(index))  # we don't want unicode-escaped
 
+    def test_repr_roundtrip(self):
+
+        mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
+        str(mi)
+        tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
+
+        # formatting
+        if compat.PY3:
+            str(mi)
+        else:
+            compat.text_type(mi)
+
+        # long format
+        mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second'])
+        result = str(mi)
+        tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
+
     def test_str(self):
         # tested elsewhere
         pass
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index e7a297860efb0..f3a7aa0bfa4c6 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -17,6 +17,7 @@
     infer_freq, to_offset, get_period_alias,
     Resolution)
 import pandas.algos as _algos
+from pandas.core.config import get_option
 
 class DatetimeIndexOpsMixin(object):
     """ common ops mixin to support a unified inteface datetimelike Index """
@@ -79,9 +80,9 @@ def freqstr(self):
 
     @cache_readonly
     def inferred_freq(self):
-        """ 
-        Trys to return a string representing a frequency guess, 
-        generated by infer_freq.  Returns None if it can't autodetect the 
+        """
+        Trys to return a string representing a frequency guess,
+        generated by infer_freq.  Returns None if it can't autodetect the
         frequency.
         """
         try:
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index 1a132966c0bce..110ce16fcda65 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -123,13 +123,20 @@ def test_representation(self):
 
         exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp3 = """DatetimeIndex(['2011-01-01'
+               '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp4 = """DatetimeIndex(['2011-01-01',
+               '2011-01-02',
+               '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
+        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
+               '2011-01-01 10:00:00+09:00',
+               '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
 
-        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
+        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
+               '2011-01-01 10:00:00-05:00',
+               'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
                                  [exp1, exp2, exp3, exp4, exp5, exp6]):
@@ -370,11 +377,16 @@ def test_representation(self):
 
         exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp3 = """TimedeltaIndex(['1 days'
+                '2 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp4 = """TimedeltaIndex(['1 days',
+                '2 days',
+                '3 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
+        exp5 = """TimedeltaIndex(['1 days 00:00:01',
+                '2 days 00:00:00',
+                '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                  [exp1, exp2, exp3, exp4, exp5]):
@@ -834,19 +846,29 @@ def test_representation(self):
 
         exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
 
-        exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
+        exp3 = """PeriodIndex(['2011-01-01'
+             '2011-01-02'], dtype='int64', freq='D')"""
 
-        exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
+        exp4 = """PeriodIndex(['2011-01-01',
+             '2011-01-02',
+             '2011-01-03'], dtype='int64', freq='D')"""
 
-        exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
+        exp5 = """PeriodIndex(['2011',
+             '2012',
+             '2013'], dtype='int64', freq='A-DEC')"""
 
-        exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
+        exp6 = """PeriodIndex(['2011-01-01 09:00',
+             '2012-02-01 10:00',
+             'NaT'], dtype='int64', freq='H')"""
 
         exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
 
-        exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
+        exp8 = """PeriodIndex(['2013Q1'
+             '2013Q2'], dtype='int64', freq='Q-DEC')"""
 
-        exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
+        exp9 = """PeriodIndex(['2013Q1',
+             '2013Q2',
+             '2013Q3'], dtype='int64', freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

From c8bf8e003d4932e5a75b0c82b17f8ca7afe9eb2c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 6 May 2015 13:26:52 +0200
Subject: [PATCH 186/239] Change Index repr to adjust to string length

Conflicts:
	pandas/tseries/base.py

use new format_data

updates

Fix detection of good width

more fixes

Change [

Conflicts:
	pandas/core/index.py

more fixes

revsised according to comments
---
 doc/source/whatsnew/v0.16.1.txt   |  15 +++-
 pandas/core/config_init.py        |   2 +-
 pandas/core/index.py              | 117 ++++++++++++++++++++----------
 pandas/tests/test_format.py       |   4 +-
 pandas/tests/test_index.py        |   2 +-
 pandas/tseries/tests/test_base.py |  70 +++++++-----------
 6 files changed, 119 insertions(+), 91 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 9d2a708f1e2d9..4b6218bfa3695 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -13,7 +13,10 @@ Highlights include:
 - New section on how-to-contribute to *pandas*, see :ref:`here <contributing>`
 - Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here <merging>`
 - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here <whatsnew_0161.enhancements.sample>`
-- ``BusinessHour`` date-offset is now supported, see :ref:`here <timeseries.businesshour>`
+- The default ``Index`` printing has changed to a more uniform format, see :ref:`here <whatsnew_0161.index_repr>`
+- ``BusinessHour`` datetime-offset is now supported, see :ref:`here <timeseries.businesshour>`
+
+>>>>>>> more fixes
 -  Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
 
 .. contents:: What's new in v0.16.1
@@ -273,8 +276,7 @@ API changes
 Index Representation
 ~~~~~~~~~~~~~~~~~~~~
 
-The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
-which is now defaulted to 20 (previously was 100). (:issue:`6482`)
+The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanges (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`)
 
 Previous Behavior
 
@@ -307,8 +309,15 @@ New Behavior
 
    pd.get_option('max_seq_items')
    pd.Index(range(4),name='foo')
+   pd.Index(range(25),name='foo')
    pd.Index(range(104),name='foo')
+   pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar')
+   pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar')
+   pd.Index(['a','bb','ccc','dddd']*100)
    pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern')
    pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
 
 .. _whatsnew_0161.deprecations:
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index e3a6a3b0641a5..a56d3b93d87da 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 20, pc_max_seq_items)
+    cf.register_option('max_seq_items', 100, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
index b793f0bf3c961..f6a2540a61e9c 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -8,6 +8,7 @@
 from pandas import compat
 import numpy as np
 
+from math import ceil
 from sys import getsizeof
 import pandas.tslib as tslib
 import pandas.lib as lib
@@ -405,8 +406,6 @@ def __unicode__(self):
         # no data provided, just attributes
         if data is None:
             data = ''
-        else:
-            data = "%s,%s" % (data, space)
 
         res = u("%s(%s%s)") % (klass,
                                data,
@@ -435,59 +434,97 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
-        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
-        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
-        sep = ',%s' % space1
+        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
+        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
+
+        sep = ','
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
+        needs_justify = self.inferred_type in ['string','categorical']
+
+        def best_len(values):
+            return max([len(x) for x in values]) + 2
+
+        def best_rows(values, max_len):
+            from pandas.core.format import get_console_size
+            display_width, _ = get_console_size()
+            if display_width is None:
+                display_width = get_option('display.width')
+            n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
+            n_rows = int(ceil(len(values) / float(n_per_row)))
+            return n_per_row, n_rows
+
+        def best_fit(values, max_len, n_rows=None, justify=False):
+
+            # number of rows to generate
+            if n_rows is None:
+                n_per_row, n_rows = best_rows(values, max_len)
+            else:
+                n_per_row = len(values)
+
+            # adjust all values to max length if we have multi-lines
+            if justify:
+                values = [values[0].rjust(max_len-2)] + [x.rjust(max_len-1) for x in values[1:]]
+                multi_line_space = space1
+            else:
+                multi_line_space = space2
+
+            sep_elements = sep + ' '
+            summary = ''
+            for i in range(n_rows - 1):
+                summary += sep_elements.join(values[i*n_per_row:(i+1)*n_per_row])
+                summary += sep
+                summary += multi_line_space
+            summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
+
+            return summary
+
         n = len(self)
         if n == 0:
-            summary = '[]'
+            summary = '[], '
         elif n == 1:
             first = formatter(self[0])
-            summary = '[%s]' % first
+            summary = '[%s], ' % first
         elif n == 2:
             first = formatter(self[0])
             last = formatter(self[-1])
-            summary = '[%s%s%s]' % (first, sep, last)
+            summary = '[%s, %s], ' % (first, last)
         elif n > max_seq_items:
             n = min(max_seq_items//2,10)
 
-            head = sep.join([ formatter(x) for x in self[:n] ])
-            tail = sep.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
-        else:
-            values = sep.join([ formatter(x) for x in self ])
-            summary = '[%s]' % (values)
+            head = [ formatter(x) for x in self[:n] ]
+            tail = [ formatter(x) for x in self[-n:] ]
+            max_len = max(best_len(head),best_len(tail))
 
-        return summary
+            if needs_justify:
+                n_rows = 1
+                justify = False
+            else:
+                n_rows = None
+                justify = True
+
+            summary = '['
+            summary += best_fit(head, max_len, n_rows=n_rows, justify=justify)
+            summary += ',' + space1 + ' ...' + space2
+            summary += best_fit(tail, max_len, n_rows=n_rows, justify=justify)
+            summary += '],'
+            summary += space1
 
-    def _format_data2(self):
-        """
-        Return the formatted data as a unicode string
-        """
-        max_seq_items = get_option('display.max_seq_items')
-        formatter = self._formatter_func
-        n = len(self)
-        if n == 0:
-            summary = '[]'
-        elif n == 1:
-            first = formatter(self[0])
-            summary = '[%s]' % first
-        elif n == 2:
-            first = formatter(self[0])
-            last = formatter(self[-1])
-            summary = '[%s, %s]' % (first, last)
-        elif n > max_seq_items:
-            n = min(max_seq_items//2,5)
-            head = ', '.join([ formatter(x) for x in self[:n] ])
-            tail = ', '.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s, ..., %s]' % (head, tail)
         else:
-            summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
+            values = [ formatter(x) for x in self ]
 
-        return summary
+            max_len = best_len(values)
+            n_per_row, n_rows = best_rows(values, max_len)
+
+            summary = '['
+            summary += best_fit(values, max_len)
+            summary += '],'
+            if n_rows > 1:
+                summary += space1
+            else:
+                summary += ' '
 
+        return summary
 
     def _format_attrs(self):
         """
@@ -2911,7 +2948,9 @@ def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
-        attrs = [('categories', default_pprint(self.categories)),
+        max_categories = (10 if get_option("display.max_categories") == 0
+                    else get_option("display.max_categories"))
+        attrs = [('categories', default_pprint(self.categories, max_seq_items=max_categories)),
                  ('ordered',self.ordered)]
         if self.name is not None:
             attrs.append(('name',default_pprint(self.name)))
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index 1005943e914dd..fd9d9546ba235 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -3220,8 +3220,8 @@ def test_dates(self):
 
     def test_mixed(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
-        self.assertTrue("['2013-01-01 00:00:00'," in text)
-        self.assertTrue(", '2014-01-01 00:00:00']" in text)
+        self.assertTrue("'2013-01-01 00:00:00'," in text)
+        self.assertTrue("'2014-01-01 00:00:00']" in text)
 
 
 class TestStringRepTimestamp(tm.TestCase):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 17a41d3f52038..444aa2a0bab1e 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -2464,7 +2464,7 @@ def test_print_unicode_columns(self):
     def test_repr_summary(self):
         with cf.option_context('display.max_seq_items', 10):
             r = repr(pd.Index(np.arange(1000)))
-            self.assertTrue(len(r) < 100)
+            self.assertTrue(len(r) < 200)
             self.assertTrue("..." in r)
 
     def test_repr_roundtrip(self):
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index 110ce16fcda65..d1b986e7a7a1c 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -123,26 +123,20 @@ def test_representation(self):
 
         exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp3 = """DatetimeIndex(['2011-01-01'
-               '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp4 = """DatetimeIndex(['2011-01-01',
-               '2011-01-02',
-               '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
-               '2011-01-01 10:00:00+09:00',
-               '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
+        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
 
-        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
-               '2011-01-01 10:00:00-05:00',
-               'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
+        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
-                                 [exp1, exp2, exp3, exp4, exp5, exp6]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width', 300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
+                                     [exp1, exp2, exp3, exp4, exp5, exp6]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -377,22 +371,18 @@ def test_representation(self):
 
         exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp3 = """TimedeltaIndex(['1 days'
-                '2 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp4 = """TimedeltaIndex(['1 days',
-                '2 days',
-                '3 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp5 = """TimedeltaIndex(['1 days 00:00:01',
-                '2 days 00:00:00',
-                '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
+        exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
-                                 [exp1, exp2, exp3, exp4, exp5]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width',300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
+                                     [exp1, exp2, exp3, exp4, exp5]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -846,29 +836,19 @@ def test_representation(self):
 
         exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
 
-        exp3 = """PeriodIndex(['2011-01-01'
-             '2011-01-02'], dtype='int64', freq='D')"""
+        exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
 
-        exp4 = """PeriodIndex(['2011-01-01',
-             '2011-01-02',
-             '2011-01-03'], dtype='int64', freq='D')"""
+        exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
 
-        exp5 = """PeriodIndex(['2011',
-             '2012',
-             '2013'], dtype='int64', freq='A-DEC')"""
+        exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
 
-        exp6 = """PeriodIndex(['2011-01-01 09:00',
-             '2012-02-01 10:00',
-             'NaT'], dtype='int64', freq='H')"""
+        exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
 
         exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
 
-        exp8 = """PeriodIndex(['2013Q1'
-             '2013Q2'], dtype='int64', freq='Q-DEC')"""
+        exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
 
-        exp9 = """PeriodIndex(['2013Q1',
-             '2013Q2',
-             '2013Q3'], dtype='int64', freq='Q-DEC')"""
+        exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

From db3ab7f42b7ec2a96f7286e96058fca87094e2c7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 9 May 2015 14:31:41 +0200
Subject: [PATCH 187/239] Index repr: allow unequal number of elements on one
 line

Inspired by numpy's array2string
---
 pandas/core/index.py | 114 +++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 58 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index f6a2540a61e9c..a0884ec5ded65 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -434,6 +434,11 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
+        from pandas.core.format import get_console_size
+        display_width, _ = get_console_size()
+        if display_width is None:
+            display_width = get_option('display.width')
+
         space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
         space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
 
@@ -442,42 +447,18 @@ def _format_data(self):
         formatter = self._formatter_func
         needs_justify = self.inferred_type in ['string','categorical']
 
-        def best_len(values):
-            return max([len(x) for x in values]) + 2
-
-        def best_rows(values, max_len):
-            from pandas.core.format import get_console_size
-            display_width, _ = get_console_size()
-            if display_width is None:
-                display_width = get_option('display.width')
-            n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
-            n_rows = int(ceil(len(values) / float(n_per_row)))
-            return n_per_row, n_rows
-
-        def best_fit(values, max_len, n_rows=None, justify=False):
-
-            # number of rows to generate
-            if n_rows is None:
-                n_per_row, n_rows = best_rows(values, max_len)
-            else:
-                n_per_row = len(values)
+        def _extend_line(s, line, value, display_width, next_line_prefix):
+            if len(line.rstrip()) + len(value.rstrip()) >= display_width:
+                s += line.rstrip()
+                line = next_line_prefix
+            line += value
+            return s, line
 
-            # adjust all values to max length if we have multi-lines
-            if justify:
-                values = [values[0].rjust(max_len-2)] + [x.rjust(max_len-1) for x in values[1:]]
-                multi_line_space = space1
+        def best_len(values):
+            if values:
+                return max([len(x) for x in values])
             else:
-                multi_line_space = space2
-
-            sep_elements = sep + ' '
-            summary = ''
-            for i in range(n_rows - 1):
-                summary += sep_elements.join(values[i*n_per_row:(i+1)*n_per_row])
-                summary += sep
-                summary += multi_line_space
-            summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
-
-            return summary
+                return 0
 
         n = len(self)
         if n == 0:
@@ -489,41 +470,58 @@ def best_fit(values, max_len, n_rows=None, justify=False):
             first = formatter(self[0])
             last = formatter(self[-1])
             summary = '[%s, %s], ' % (first, last)
-        elif n > max_seq_items:
-            n = min(max_seq_items//2,10)
-
-            head = [ formatter(x) for x in self[:n] ]
-            tail = [ formatter(x) for x in self[-n:] ]
-            max_len = max(best_len(head),best_len(tail))
+        else:
+            if n > max_seq_items:
+                n = min(max_seq_items//2,10)
+                head = [ formatter(x) for x in self[:n] ]
+                tail = [ formatter(x) for x in self[-n:] ]
+                summary_insert = True
+            else:
+                head = []
+                tail = [ formatter(x) for x in self ]
+                summary_insert = False
 
             if needs_justify:
-                n_rows = 1
                 justify = False
             else:
-                n_rows = None
                 justify = True
 
-            summary = '['
-            summary += best_fit(head, max_len, n_rows=n_rows, justify=justify)
-            summary += ',' + space1 + ' ...' + space2
-            summary += best_fit(tail, max_len, n_rows=n_rows, justify=justify)
+            # adjust all values to max length if needed
+            if justify:
+                max_len = max(best_len(head), best_len(tail))
+                head = [x.rjust(max_len) for x in head]
+                tail = [x.rjust(max_len) for x in tail]
+
+            summary = ""
+            line = space2
+
+            for i in range(len(head)):
+                word = head[i] + sep + ' '
+                summary, line = _extend_line(summary, line, word,
+                                             display_width, space2)
+            if summary_insert:
+                summary += line + space2 + '...'
+                line = space2
+
+            for i in range(len(tail)-1):
+                word = tail[i] + sep + ' '
+                summary, line = _extend_line(summary, line, word,
+                                             display_width, space2)
+
+            # last value: no sep added + 1 space of width used for trailing ','
+            summary, line = _extend_line(summary, line, tail[-1],
+                                         display_width - 2, space2)
+            summary += line
             summary += '],'
-            summary += space1
-
-        else:
-            values = [ formatter(x) for x in self ]
 
-            max_len = best_len(values)
-            n_per_row, n_rows = best_rows(values, max_len)
-
-            summary = '['
-            summary += best_fit(values, max_len)
-            summary += '],'
-            if n_rows > 1:
+            if len(summary) > (display_width):
                 summary += space1
-            else:
+            else:  # one row
                 summary += ' '
 
+            # remove initial space
+            summary = '[' + summary[len(space2):]
+
         return summary
 
     def _format_attrs(self):

From 9b0345faa5ad89a2f1c5c017bcf9667e4a655e2c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 9 May 2015 14:21:42 -0400
Subject: [PATCH 188/239] more fixups

---
 doc/source/whatsnew/v0.16.1.txt |  7 ++-----
 pandas/core/index.py            | 33 ++++++++++++++++++---------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 4b6218bfa3695..c9b45aad123f7 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -282,9 +282,6 @@ Previous Behavior
 
 .. code-block:: python
 
-   In [1]: pd.get_option('max_seq_items')
-   Out[1]: 100
-
    In [2]: pd.Index(range(4),name='foo')
    Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
 
@@ -307,15 +304,15 @@ New Behavior
 
 .. ipython:: python
 
-   pd.get_option('max_seq_items')
+   pd.set_option('display.width',100)
    pd.Index(range(4),name='foo')
    pd.Index(range(25),name='foo')
    pd.Index(range(104),name='foo')
+   pd.Index(['datetime', 'sA', 'sB', 'sC', 'flow', 'error', 'temp', 'ref', 'a_bit_a_longer_one']*2)
    pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar')
    pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar')
    pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar')
    pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar')
-   pd.Index(['a','bb','ccc','dddd']*100)
    pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
    pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern')
    pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
diff --git a/pandas/core/index.py b/pandas/core/index.py
index a0884ec5ded65..21f1fed2cd6da 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -437,17 +437,24 @@ def _format_data(self):
         from pandas.core.format import get_console_size
         display_width, _ = get_console_size()
         if display_width is None:
-            display_width = get_option('display.width')
+            display_width = get_option('display.width') or 80
 
         space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
         space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
 
+        n = len(self)
         sep = ','
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
-        needs_justify = self.inferred_type in ['string','categorical']
+
+        # do we want to justify (only do so for non-objects)
+        is_justify = not (self.inferred_type == 'string' or self.inferred_type == 'categorical' and is_object_dtype(self.categories))
+
+        # are we a truncated display
+        is_truncated = n > max_seq_items
 
         def _extend_line(s, line, value, display_width, next_line_prefix):
+
             if len(line.rstrip()) + len(value.rstrip()) >= display_width:
                 s += line.rstrip()
                 line = next_line_prefix
@@ -460,7 +467,6 @@ def best_len(values):
             else:
                 return 0
 
-        n = len(self)
         if n == 0:
             summary = '[], '
         elif n == 1:
@@ -471,26 +477,23 @@ def best_len(values):
             last = formatter(self[-1])
             summary = '[%s, %s], ' % (first, last)
         else:
+
             if n > max_seq_items:
                 n = min(max_seq_items//2,10)
                 head = [ formatter(x) for x in self[:n] ]
                 tail = [ formatter(x) for x in self[-n:] ]
-                summary_insert = True
             else:
                 head = []
                 tail = [ formatter(x) for x in self ]
-                summary_insert = False
-
-            if needs_justify:
-                justify = False
-            else:
-                justify = True
 
             # adjust all values to max length if needed
-            if justify:
-                max_len = max(best_len(head), best_len(tail))
-                head = [x.rjust(max_len) for x in head]
-                tail = [x.rjust(max_len) for x in tail]
+            if is_justify:
+
+                # however, if we are not truncated and we are only a single line, then don't justify
+                if is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width):
+                    max_len = max(best_len(head), best_len(tail))
+                    head = [x.rjust(max_len) for x in head]
+                    tail = [x.rjust(max_len) for x in tail]
 
             summary = ""
             line = space2
@@ -499,7 +502,7 @@ def best_len(values):
                 word = head[i] + sep + ' '
                 summary, line = _extend_line(summary, line, word,
                                              display_width, space2)
-            if summary_insert:
+            if is_truncated:
                 summary += line + space2 + '...'
                 line = space2
 

From 547f8d369aa6c2beae8caa632c89fb76a8171a36 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 9 May 2015 17:56:59 -0400
Subject: [PATCH 189/239] DOC: missed merge in v0.16.1 whatsnew

---
 doc/source/whatsnew/v0.16.1.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index c9b45aad123f7..938eee7cf9afc 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -16,7 +16,6 @@ Highlights include:
 - The default ``Index`` printing has changed to a more uniform format, see :ref:`here <whatsnew_0161.index_repr>`
 - ``BusinessHour`` datetime-offset is now supported, see :ref:`here <timeseries.businesshour>`
 
->>>>>>> more fixes
 -  Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here <whatsnew_0161.enhancements.string>`
 
 .. contents:: What's new in v0.16.1

From 204e6e42918af17dabaa21b4b5c0f21dc9b9af1e Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 9 May 2015 12:27:54 -0700
Subject: [PATCH 190/239] BUG: Series.fillna() raises if given a numerically
 convertible string

---
 doc/source/whatsnew/v0.16.1.txt |  4 ++++
 pandas/core/internals.py        |  3 ++-
 pandas/tests/test_series.py     | 10 ++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 938eee7cf9afc..64752e9fe05be 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -545,4 +545,8 @@ Bug Fixes
 >>>>>>> df730a3... BUG: median() not correctly handling non-float null values (fixes #10040)
 =======
 - Bug in ``.median()`` where non-float null values are not handled correctly (:issue:`10040`)
+<<<<<<< HEAD
 >>>>>>> 6c80f68... DOC: prepare for 0.16.1 release
+=======
+- Bug in Series.fillna() where it raises if a numerically convertible string is given (:issue:`10092`)
+>>>>>>> 8ccc9b3... BUG: Series.fillna() raises if given a numerically convertible string
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 564484a19e873..3395ea360165e 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4017,7 +4017,8 @@ def _putmask_smart(v, m, n):
     try:
         nn = n[m]
         nn_at = nn.astype(v.dtype)
-        if (nn == nn_at).all():
+        comp = (nn == nn_at)
+        if is_list_like(comp) and comp.all():
             nv = v.copy()
             nv[m] = nn_at
             return nv
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index f4880fdbb5de4..22f8aee1e0a4e 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -3654,6 +3654,16 @@ def test_fillna(self):
         expected = Series([999,999,np.nan],index=[0,1,2])
         assert_series_equal(result,expected)
 
+        # GH 9043
+        # make sure a string representation of int/float values can be filled
+        # correctly without raising errors or being converted
+        vals = ['0', '1.5', '-0.3']
+        for val in vals:
+            s = Series([0, 1, np.nan, np.nan, 4], dtype='float64')
+            result = s.fillna(val)
+            expected = Series([0, 1, val, val, 4], dtype='object')
+            assert_series_equal(result, expected)
+
     def test_fillna_bug(self):
         x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
         filled = x.fillna(method='ffill')

From 12ba4b4108b4c6eec075c7a792c145df25410496 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 10 May 2015 21:10:04 -0400
Subject: [PATCH 191/239] RLS: v0.16.1 final

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index f0090aff31430..edd3e398c27be 100755
--- a/setup.py
+++ b/setup.py
@@ -194,8 +194,8 @@ def build_extensions(self):
 
 MAJOR = 0
 MINOR = 16
-MICRO = 0
-ISRELEASED = False
+MICRO = 1
+ISRELEASED = True
 VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
 QUALIFIER = ''
 

From 32c3dce75bb163925d4fbdfe515a8c72aaed42ba Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 10 May 2015 21:11:29 -0400
Subject: [PATCH 192/239] initial commit for v0.17.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index edd3e398c27be..4375aa550f020 100755
--- a/setup.py
+++ b/setup.py
@@ -195,7 +195,7 @@ def build_extensions(self):
 MAJOR = 0
 MINOR = 16
 MICRO = 1
-ISRELEASED = True
+ISRELEASED = False
 VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
 QUALIFIER = ''
 

From cc6f41f75bde0364e08aeecab2d08af0a4157bcd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 7 May 2015 14:03:55 +0200
Subject: [PATCH 193/239] DOC/DEPR: port pandas.rpy to rpy2 guide (GH9602)

---
 doc/source/r_interface.rst | 64 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst
index 2207c823f43b1..da37c92c88ecf 100644
--- a/doc/source/r_interface.rst
+++ b/doc/source/r_interface.rst
@@ -15,7 +15,69 @@ rpy2 / R interface
 
 .. warning::
 
-   In v0.16.0, the ``pandas.rpy`` interface has been **deprecated and will be removed in a future version**. Similar functionaility can be accessed thru the `rpy2 <http://rpy.sourceforge.net/>`_ project.
+   In v0.16.0, the ``pandas.rpy`` interface has been **deprecated and will be
+   removed in a future version**. Similar functionality can be accessed
+   through the `rpy2 <http://rpy.sourceforge.net/>`_ project.
+   See the :ref:`updating <rpy.updating>` section for a guide to port your
+   code from the ``pandas.rpy`` to ``rpy2`` functions.
+
+
+.. _rpy.updating:
+
+Updating your code to use rpy2 functions
+----------------------------------------
+
+In v0.16.0, the ``pandas.rpy`` module has been **deprecated** and users are
+pointed to the similar functionality in ``rpy2`` itself (rpy2 >= 2.4).
+
+Instead of importing ``import pandas.rpy.common as com``, the following imports
+should be done to activate the pandas conversion support in rpy2::
+
+    from rpy2.robjects import pandas2ri
+    pandas2ri.activate()
+
+Converting data frames back and forth between rpy2 and pandas should be largely
+automated (no need to convert explicitly, it will be done on the fly in most
+rpy2 functions).
+
+To convert explicitly, the functions are ``pandas2ri.py2ri()`` and
+``pandas2ri.ri2py()``. So these functions can be used to replace the existing
+functions in pandas:
+
+- ``com.convert_to_r_dataframe(df)`` should be replaced with ``pandas2ri.py2ri(df)``
+- ``com.convert_robj(rdf)`` should be replaced with ``pandas2ri.ri2py(rdf)``
+
+Note: these functions are for the latest version (rpy2 2.5.x) and were called
+``pandas2ri.pandas2ri()`` and ``pandas2ri.ri2pandas()`` previously.
+
+Some of the other functionality in `pandas.rpy` can be replaced easily as well.
+For example to load R data as done with the ``load_data`` function, the
+current method::
+
+    df_iris = com.load_data('iris')
+
+can be replaced with::
+
+    from rpy2.robjects import r
+    r.data('iris')
+    df_iris = pandas2ri.ri2py(r[name])
+
+The ``convert_to_r_matrix`` function can be replaced by the normal
+``pandas2ri.py2ri`` to convert dataframes, with a subsequent call to R
+``as.matrix`` function.
+
+.. warning::
+
+    Not all conversion functions in rpy2 are working exactly the same as the
+    current methods in pandas. If you experience problems or limitations in
+    comparison to the ones in pandas, please report this at the
+    `issue tracker <https://github.com/pydata/pandas/issues>`_.
+
+See also the documentation of the `rpy2 <http://rpy.sourceforge.net/>`_ project.
+
+
+R interface with rpy2
+---------------------
 
 If your computer has R and rpy2 (> 2.2) installed (which will be left to the
 reader), you will be able to leverage the below functionality. On Windows,

From cf22dcc8bb8ae4b081bcbdb7515cbbd7e19d1c0f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 7 May 2015 14:14:16 +0200
Subject: [PATCH 194/239] DOC/DEPR: add link to docs in deprecation warning

---
 pandas/rpy/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/rpy/__init__.py b/pandas/rpy/__init__.py
index 899b684ecbff9..bad7ebc580ce2 100644
--- a/pandas/rpy/__init__.py
+++ b/pandas/rpy/__init__.py
@@ -5,7 +5,10 @@
 import warnings
 warnings.warn("The pandas.rpy module is deprecated and will be "
               "removed in a future version. We refer to external packages "
-              "like rpy2, found here: http://rpy.sourceforge.net", FutureWarning)
+              "like rpy2. "
+              "\nSee here for a guide on how to port your code to rpy2: "
+              "http://pandas.pydata.org/pandas-docs/stable/r_interface.html",
+              FutureWarning)
 
 try:
     from .common import importr, r, load_data

From 10308bdd45c116ff77b8bedc30a45ed4f150976a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 24 Mar 2015 21:37:18 +0100
Subject: [PATCH 195/239] DOC: fix api documentation for accessors

---
 doc/_templates/autosummary/accessor.rst       |  6 +++
 .../autosummary/class_without_autosummary.rst |  6 +++
 doc/source/api.rst                            | 54 ++++++++++++++-----
 3 files changed, 54 insertions(+), 12 deletions(-)
 create mode 100644 doc/_templates/autosummary/accessor.rst
 create mode 100644 doc/_templates/autosummary/class_without_autosummary.rst

diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst
new file mode 100644
index 0000000000000..1401121fb51c6
--- /dev/null
+++ b/doc/_templates/autosummary/accessor.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module.split('.')[0] }}
+
+.. automethod:: {{ [module.split('.')[1], objname]|join('.') }}
diff --git a/doc/_templates/autosummary/class_without_autosummary.rst b/doc/_templates/autosummary/class_without_autosummary.rst
new file mode 100644
index 0000000000000..6676c672b206d
--- /dev/null
+++ b/doc/_templates/autosummary/class_without_autosummary.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 57ae089e463c8..3f47c0380116c 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -470,6 +470,7 @@ These can be accessed like ``Series.dt.<property>``.
    Series.dt.microsecond
    Series.dt.nanosecond
    Series.dt.second
+   Series.dt.week
    Series.dt.weekofyear
    Series.dt.dayofweek
    Series.dt.weekday
@@ -481,6 +482,10 @@ These can be accessed like ``Series.dt.<property>``.
    Series.dt.is_quarter_end
    Series.dt.is_year_start
    Series.dt.is_year_end
+   Series.dt.daysinmonth
+   Series.dt.days_in_month
+   Series.dt.tz
+   Series.dt.freq
 
 **Datetime Methods**
 
@@ -575,6 +580,20 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.isdecimal
    Series.str.get_dummies
 
+..
+    The following is needed to ensure the generated pages are created with the
+    correct template (otherwise they would be created in the Series class page)
+
+..
+    .. autosummary::
+       :toctree: generated/
+       :template: autosummary/accessor.rst
+
+       Series.str
+       Series.cat
+       Series.dt
+
+
 .. _api.categorical:
 
 Categorical
@@ -582,22 +601,28 @@ Categorical
 
 If the Series is of dtype ``category``, ``Series.cat`` can be used to change the the categorical
 data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
-following usable methods and properties (all available as ``Series.cat.<method_or_property>``).
+following usable methods and properties:
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_attribute.rst
+
+   Series.cat.categories
+   Series.cat.ordered
+   Series.cat.codes
 
 .. autosummary::
    :toctree: generated/
+   :template: autosummary/accessor_method.rst
 
-   Categorical.categories
-   Categorical.ordered
-   Categorical.rename_categories
-   Categorical.reorder_categories
-   Categorical.add_categories
-   Categorical.remove_categories
-   Categorical.remove_unused_categories
-   Categorical.set_categories
-   Categorical.as_ordered
-   Categorical.as_unordered
-   Categorical.codes
+   Series.cat.rename_categories
+   Series.cat.reorder_categories
+   Series.cat.add_categories
+   Series.cat.remove_categories
+   Series.cat.remove_unused_categories
+   Series.cat.set_categories
+   Series.cat.as_ordered
+   Series.cat.as_unordered
 
 To create a Series of dtype ``category``, use ``cat = s.astype("category")``.
 
@@ -606,8 +631,13 @@ adding ordering information or special categories is need at creation time of th
 
 .. autosummary::
    :toctree: generated/
+   :template: autosummary/class_without_autosummary.rst
 
    Categorical
+
+.. autosummary::
+   :toctree: generated/
+
    Categorical.from_codes
 
 ``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts

From 21cf5cc6326bf12feb921c203f270fdd9f696dc1 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 11 May 2015 08:33:19 -0400
Subject: [PATCH 196/239] DOC: add v0.17.0 whatsnew to index

---
 doc/source/whatsnew.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index d05c19a5e4bea..24cee99a5d072 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,6 +18,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.17.0.txt
+
 .. include:: whatsnew/v0.16.1.txt
 
 .. include:: whatsnew/v0.16.0.txt

From 05693f9d5803e102e7052a459d29b6b21b8c7891 Mon Sep 17 00:00:00 2001
From: Andrew Rosenfeld <rosnfeld@gmail.com>
Date: Mon, 11 May 2015 23:07:06 +0100
Subject: [PATCH 197/239] BUG: categorical doesn't handle display.width of None
 in Python 3 (GH10087)

---
 doc/source/whatsnew/v0.17.0.txt  | 2 ++
 pandas/core/categorical.py       | 3 +--
 pandas/tests/test_categorical.py | 9 +++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 0184acce7a46b..9bf478831ea01 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -57,3 +57,5 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+
+- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 97368baffd40b..ffb3c429d250b 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -1310,8 +1310,7 @@ def _repr_categories_info(self):
         levheader = "Categories (%d, %s): " % (len(self.categories),
                                                self.categories.dtype)
         width, height = get_terminal_size()
-        max_width = (width if get_option("display.width") == 0
-                    else get_option("display.width"))
+        max_width = get_option("display.width") or width
         if com.in_ipython_frontend():
             # 0 = no breaks
             max_width = 0
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index c03fd93f6173f..21b64378cfc24 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -521,6 +521,15 @@ def test_empty_print(self):
         expected = ("[], Categories (0, object): []")
         self.assertEqual(expected, repr(factor))
 
+    def test_print_none_width(self):
+        # GH10087
+        a = pd.Series(pd.Categorical([1,2,3,4], name="a"))
+        exp = u("0    1\n1    2\n2    3\n3    4\n" +
+              "Name: a, dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
+
+        with option_context("display.width", None):
+            self.assertEqual(exp, repr(a))
+
     def test_periodindex(self):
         idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
                             '2014-03', '2014-03'], freq='M')

From 927c6808ac9615ad8268f2bdc8f77077081b8e29 Mon Sep 17 00:00:00 2001
From: Younggun Kim <iam@younggun.kim>
Date: Tue, 5 May 2015 06:10:40 +0900
Subject: [PATCH 198/239] ENH: Series.resample performance with datetime64[ns]
 #7754

---
 doc/source/whatsnew/v0.17.0.txt |  2 ++
 pandas/core/groupby.py          |  2 ++
 vb_suite/timeseries.py          | 10 ++++++++++
 3 files changed, 14 insertions(+)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 9bf478831ea01..102460ff6d476 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -53,6 +53,8 @@ Removal of prior version deprecations/changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
+- Improved ``Series.resample`` performance with dtype=datetime64[ns] (:issue:`7754`)
+
 .. _whatsnew_0170.bug_fixes:
 
 Bug Fixes
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 1f76d80c34a90..8b8e5e4c2fe02 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1496,6 +1496,8 @@ def aggregate(self, values, how, axis=0):
 
         if is_datetime_or_timedelta_dtype(values.dtype):
             values = values.view('int64')
+            # GH 7754
+            is_numeric = True
         elif is_bool_dtype(values.dtype):
             values = _algos.ensure_float64(values)
         elif com.is_integer_dtype(values):
diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py
index f0c3961ae0277..57fb1ada78691 100644
--- a/vb_suite/timeseries.py
+++ b/vb_suite/timeseries.py
@@ -135,6 +135,16 @@ def date_range(start=None, end=None, periods=None, freq=None):
     Benchmark("ts.resample('D', how='mean')", setup,
               start_date=datetime(2012, 4, 25))
 
+# GH 7754
+setup = common_setup + """
+rng = date_range(start='2000-01-01 00:00:00',
+                    end='2000-01-01 10:00:00', freq='555000U')
+int_ts = Series(5, rng, dtype='int64')
+ts = int_ts.astype('datetime64[ns]')
+"""
+
+timeseries_resample_datetime64 = Benchmark("ts.resample('1S', how='last')", setup)
+
 #----------------------------------------------------------------------
 # to_datetime
 

From cf6acd4c604f0488b2e7247e40fe72348ccf5abd Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 10 May 2015 06:50:16 +0900
Subject: [PATCH 199/239] BUG: Timestamp properties may return np.int

---
 doc/source/whatsnew/v0.17.0.txt         |  7 +++
 pandas/tseries/tests/test_timedeltas.py | 67 +++++++++++++++----------
 pandas/tseries/tests/test_tslib.py      | 52 +++++++++++++++++++
 pandas/tslib.pyx                        |  4 +-
 4 files changed, 102 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 102460ff6d476..cc044bc35a707 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -61,3 +61,10 @@ Bug Fixes
 ~~~~~~~~~
 
 - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
+
+
+- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
+- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
+
+
+
diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index 45145eb7ab7e8..ae868af60ae63 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -311,49 +311,64 @@ def test_fields(self):
 
         # compat to datetime.timedelta
         rng = to_timedelta('1 days, 10:11:12')
-        self.assertEqual(rng.days,1)
-        self.assertEqual(rng.seconds,10*3600+11*60+12)
-        self.assertEqual(rng.microseconds,0)
-        self.assertEqual(rng.nanoseconds,0)
+        self.assertEqual(rng.days, 1)
+        self.assertEqual(rng.seconds, 10*3600+11*60+12)
+        self.assertEqual(rng.microseconds, 0)
+        self.assertEqual(rng.nanoseconds, 0)
 
         self.assertRaises(AttributeError, lambda : rng.hours)
         self.assertRaises(AttributeError, lambda : rng.minutes)
         self.assertRaises(AttributeError, lambda : rng.milliseconds)
 
+        # GH 10050
+        self.assertTrue(isinstance(rng.days, int))
+        self.assertTrue(isinstance(rng.seconds, int))
+        self.assertTrue(isinstance(rng.microseconds, int))
+        self.assertTrue(isinstance(rng.nanoseconds, int))
+
         td = Timedelta('-1 days, 10:11:12')
-        self.assertEqual(abs(td),Timedelta('13:48:48'))
+        self.assertEqual(abs(td), Timedelta('13:48:48'))
         self.assertTrue(str(td) == "-1 days +10:11:12")
-        self.assertEqual(-td,Timedelta('0 days 13:48:48'))
-        self.assertEqual(-Timedelta('-1 days, 10:11:12').value,49728000000000)
-        self.assertEqual(Timedelta('-1 days, 10:11:12').value,-49728000000000)
+        self.assertEqual(-td, Timedelta('0 days 13:48:48'))
+        self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000)
+        self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000)
 
         rng = to_timedelta('-1 days, 10:11:12.100123456')
-        self.assertEqual(rng.days,-1)
-        self.assertEqual(rng.seconds,10*3600+11*60+12)
-        self.assertEqual(rng.microseconds,100*1000+123)
-        self.assertEqual(rng.nanoseconds,456)
+        self.assertEqual(rng.days, -1)
+        self.assertEqual(rng.seconds, 10*3600+11*60+12)
+        self.assertEqual(rng.microseconds, 100*1000+123)
+        self.assertEqual(rng.nanoseconds, 456)
         self.assertRaises(AttributeError, lambda : rng.hours)
         self.assertRaises(AttributeError, lambda : rng.minutes)
         self.assertRaises(AttributeError, lambda : rng.milliseconds)
 
         # components
         tup = pd.to_timedelta(-1, 'us').components
-        self.assertEqual(tup.days,-1)
-        self.assertEqual(tup.hours,23)
-        self.assertEqual(tup.minutes,59)
-        self.assertEqual(tup.seconds,59)
-        self.assertEqual(tup.milliseconds,999)
-        self.assertEqual(tup.microseconds,999)
-        self.assertEqual(tup.nanoseconds,0)
+        self.assertEqual(tup.days, -1)
+        self.assertEqual(tup.hours, 23)
+        self.assertEqual(tup.minutes, 59)
+        self.assertEqual(tup.seconds, 59)
+        self.assertEqual(tup.milliseconds, 999)
+        self.assertEqual(tup.microseconds, 999)
+        self.assertEqual(tup.nanoseconds, 0)
+
+        # GH 10050
+        self.assertTrue(isinstance(tup.days, int))
+        self.assertTrue(isinstance(tup.hours, int))
+        self.assertTrue(isinstance(tup.minutes, int))
+        self.assertTrue(isinstance(tup.seconds, int))
+        self.assertTrue(isinstance(tup.milliseconds, int))
+        self.assertTrue(isinstance(tup.microseconds, int))
+        self.assertTrue(isinstance(tup.nanoseconds, int))
 
         tup = Timedelta('-1 days 1 us').components
-        self.assertEqual(tup.days,-2)
-        self.assertEqual(tup.hours,23)
-        self.assertEqual(tup.minutes,59)
-        self.assertEqual(tup.seconds,59)
-        self.assertEqual(tup.milliseconds,999)
-        self.assertEqual(tup.microseconds,999)
-        self.assertEqual(tup.nanoseconds,0)
+        self.assertEqual(tup.days, -2)
+        self.assertEqual(tup.hours, 23)
+        self.assertEqual(tup.minutes, 59)
+        self.assertEqual(tup.seconds, 59)
+        self.assertEqual(tup.milliseconds, 999)
+        self.assertEqual(tup.microseconds, 999)
+        self.assertEqual(tup.nanoseconds, 0)
 
     def test_timedelta_range(self):
 
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
index e452ddee9d8db..6b3dd63e6415b 100644
--- a/pandas/tseries/tests/test_tslib.py
+++ b/pandas/tseries/tests/test_tslib.py
@@ -369,6 +369,58 @@ def test_today(self):
         self.assertTrue(abs(ts_from_string_tz.tz_localize(None)
                             - ts_from_method_tz.tz_localize(None)) < delta)
 
+    def test_fields(self):
+        # GH 10050
+        ts = Timestamp('2015-05-10 09:06:03.000100001')
+        self.assertEqual(ts.year, 2015)
+        self.assertTrue(isinstance(ts.year, int))
+        self.assertEqual(ts.month, 5)
+        self.assertTrue(isinstance(ts.month, int))
+        self.assertEqual(ts.day, 10)
+        self.assertTrue(isinstance(ts.day, int))
+        self.assertEqual(ts.hour, 9)
+        self.assertTrue(isinstance(ts.hour, int))
+        self.assertEqual(ts.minute, 6)
+        self.assertTrue(isinstance(ts.minute, int))
+        self.assertEqual(ts.second, 3)
+        self.assertTrue(isinstance(ts.second, int))
+        self.assertRaises(AttributeError, lambda : ts.millisecond)
+        self.assertEqual(ts.microsecond, 100)
+        self.assertTrue(isinstance(ts.microsecond, int))
+        self.assertEqual(ts.nanosecond, 1)
+        self.assertTrue(isinstance(ts.nanosecond, int))
+        self.assertEqual(ts.dayofweek, 6)
+        self.assertTrue(isinstance(ts.dayofweek, int))
+        self.assertEqual(ts.quarter, 2)
+        self.assertTrue(isinstance(ts.quarter, int))
+        self.assertEqual(ts.dayofyear, 130)
+        self.assertTrue(isinstance(ts.dayofyear, int))
+        self.assertEqual(ts.week, 19)
+        self.assertTrue(isinstance(ts.week, int))
+        self.assertEqual(ts.daysinmonth, 31)
+        self.assertTrue(isinstance(ts.days_in_month, int))
+        self.assertEqual(ts.daysinmonth, 31)
+        self.assertTrue(isinstance(ts.daysinmonth, int))
+
+    def test_nat_fields(self):
+        # GH 10050
+        ts = Timestamp('NaT')
+        self.assertTrue(np.isnan(ts.year))
+        self.assertTrue(np.isnan(ts.month))
+        self.assertTrue(np.isnan(ts.day))
+        self.assertTrue(np.isnan(ts.hour))
+        self.assertTrue(np.isnan(ts.minute))
+        self.assertTrue(np.isnan(ts.second))
+        self.assertTrue(np.isnan(ts.microsecond))
+        self.assertTrue(np.isnan(ts.nanosecond))
+        self.assertTrue(np.isnan(ts.dayofweek))
+        self.assertTrue(np.isnan(ts.quarter))
+        self.assertTrue(np.isnan(ts.dayofyear))
+        self.assertTrue(np.isnan(ts.week))
+        self.assertTrue(np.isnan(ts.daysinmonth))
+        self.assertTrue(np.isnan(ts.days_in_month))
+
+
 class TestDatetimeParsingWrappers(tm.TestCase):
     def test_does_not_convert_mixed_integer(self):
         bad_date_strings = (
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 40dbbd7584c7a..66f14bfb0346a 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -627,7 +627,7 @@ class NaTType(_NaT):
 
 fields = ['year', 'quarter', 'month', 'day', 'hour',
           'minute', 'second', 'millisecond', 'microsecond', 'nanosecond',
-          'week', 'dayofyear', 'days_in_month']
+          'week', 'dayofyear', 'days_in_month', 'daysinmonth', 'dayofweek']
 for field in fields:
     prop = property(fget=lambda self: np.nan)
     setattr(NaTType, field, prop)
@@ -952,7 +952,7 @@ cdef class _Timestamp(datetime):
 
     cpdef _get_field(self, field):
         out = get_date_field(np.array([self.value], dtype=np.int64), field)
-        return out[0]
+        return int(out[0])
 
     cpdef _get_start_end_field(self, field):
         month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12

From 21e0957737933268cd6b29327cd6bdb626f56df4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 4 May 2015 10:40:26 +0200
Subject: [PATCH 200/239] CLN: cleanup hashtable.pyx

remove unused variables and commented debug code
do not store result of kh_put* when it is not used
use khiter_t instead of int
rename val & max_val to count & max_count for readability & consistency
---
 pandas/hashtable.pyx | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
index 8bdcfb44242ff..c4cd788216018 100644
--- a/pandas/hashtable.pyx
+++ b/pandas/hashtable.pyx
@@ -211,7 +211,6 @@ cdef class StringHashTable(HashTable):
     def unique(self, ndarray[object] values):
         cdef:
             Py_ssize_t i, n = len(values)
-            Py_ssize_t idx, count = 0
             int ret = 0
             object val
             char *buf
@@ -223,12 +222,9 @@ cdef class StringHashTable(HashTable):
             buf = util.get_c_string(val)
             k = kh_get_str(self.table, buf)
             if k == self.table.n_buckets:
-                k = kh_put_str(self.table, buf, &ret)
-                # print 'putting %s, %s' % (val, count)
-                count += 1
+                kh_put_str(self.table, buf, &ret)
                 uniques.append(val)
 
-        # return None
         return uniques.to_array()
 
     def factorize(self, ndarray[object] values):
@@ -258,7 +254,6 @@ cdef class StringHashTable(HashTable):
                 labels[i] = count
                 count += 1
 
-        # return None
         return reverse, labels
 
 cdef class Int32HashTable(HashTable):
@@ -319,7 +314,6 @@ cdef class Int32HashTable(HashTable):
     def lookup(self, ndarray[int32_t] values):
         cdef:
             Py_ssize_t i, n = len(values)
-            int ret = 0
             int32_t val
             khiter_t k
             ndarray[int32_t] locs = np.empty(n, dtype=np.int64)
@@ -357,7 +351,6 @@ cdef class Int32HashTable(HashTable):
                 labels[i] = count
                 count += 1
 
-        # return None
         return reverse, labels
 
 cdef class Int64HashTable: #(HashTable):
@@ -518,7 +511,6 @@ cdef class Int64HashTable: #(HashTable):
     def unique(self, ndarray[int64_t] values):
         cdef:
             Py_ssize_t i, n = len(values)
-            Py_ssize_t idx, count = 0
             int ret = 0
             ndarray result
             int64_t val
@@ -529,9 +521,8 @@ cdef class Int64HashTable: #(HashTable):
             val = values[i]
             k = kh_get_int64(self.table, val)
             if k == self.table.n_buckets:
-                k = kh_put_int64(self.table, val, &ret)
+                kh_put_int64(self.table, val, &ret)
                 uniques.append(val)
-                count += 1
 
         result = uniques.to_array()
 
@@ -644,7 +635,6 @@ cdef class Float64HashTable(HashTable):
     def unique(self, ndarray[float64_t] values):
         cdef:
             Py_ssize_t i, n = len(values)
-            Py_ssize_t idx, count = 0
             int ret = 0
             float64_t val
             khiter_t k
@@ -657,9 +647,8 @@ cdef class Float64HashTable(HashTable):
             if val == val:
                 k = kh_get_float64(self.table, val)
                 if k == self.table.n_buckets:
-                    k = kh_put_float64(self.table, val, &ret)
+                    kh_put_float64(self.table, val, &ret)
                     uniques.append(val)
-                    count += 1
             elif not seen_na:
                 seen_na = 1
                 uniques.append(ONAN)
@@ -786,7 +775,6 @@ cdef class PyObjectHashTable(HashTable):
     def unique(self, ndarray[object] values):
         cdef:
             Py_ssize_t i, n = len(values)
-            Py_ssize_t idx, count = 0
             int ret = 0
             object val
             ndarray result
@@ -800,7 +788,7 @@ cdef class PyObjectHashTable(HashTable):
             if not _checknan(val):
                 k = kh_get_pymap(self.table, <PyObject*>val)
                 if k == self.table.n_buckets:
-                    k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+                    kh_put_pymap(self.table, <PyObject*>val, &ret)
                     uniques.append(val)
             elif not seen_na:
                 seen_na = 1
@@ -918,7 +906,7 @@ cdef class Int64Factorizer:
 
 cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t *table):
     cdef:
-        int k
+        khiter_t k
         Py_ssize_t i, n = len(values)
         int ret = 0
 
@@ -938,7 +926,6 @@ cpdef value_count_int64(ndarray[int64_t] values):
     cdef:
         Py_ssize_t i
         kh_int64_t *table
-        int ret = 0
         int k
 
     table = kh_init_int64()
@@ -961,7 +948,7 @@ cdef build_count_table_object(ndarray[object] values,
                               ndarray[uint8_t, cast=True] mask,
                               kh_pymap_t *table):
     cdef:
-        int k
+        khiter_t k
         Py_ssize_t i, n = len(values)
         int ret = 0
 
@@ -983,7 +970,7 @@ cdef build_count_table_object(ndarray[object] values,
 cpdef value_count_object(ndarray[object] values,
                        ndarray[uint8_t, cast=True] mask):
     cdef:
-        Py_ssize_t i = len(values)
+        Py_ssize_t i
         kh_pymap_t *table
         int k
 
@@ -1008,9 +995,7 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
         int count, max_count = 2
         int j = -1 # so you can do +=
         int k
-        Py_ssize_t i, n = len(values)
         kh_pymap_t *table
-        int ret = 0
 
     table = kh_init_pymap()
     build_count_table_object(values, mask, table)
@@ -1036,11 +1021,10 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
 
 def mode_int64(ndarray[int64_t] values):
     cdef:
-        int val, max_val = 2
+        int count, max_count = 2
         int j = -1 # so you can do +=
         int k
         kh_int64_t *table
-        list uniques = []
 
     table = kh_init_int64()
 
@@ -1049,12 +1033,12 @@ def mode_int64(ndarray[int64_t] values):
     modes = np.empty(table.n_buckets, dtype=np.int64)
     for k in range(table.n_buckets):
         if kh_exist_int64(table, k):
-            val = table.vals[k]
+            count = table.vals[k]
 
-            if val == max_val:
+            if count == max_count:
                 j += 1
-            elif val > max_val:
-                max_val = val
+            elif count > max_count:
+                max_count = count
                 j = 0
             else:
                 continue

From 2b3844c66c04b9e9543c761682549cba5cc01608 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 9 May 2015 17:48:32 -0700
Subject: [PATCH 201/239] CLN: clean up unused imports

---
 pandas/core/base.py        | 3 ---
 pandas/core/categorical.py | 2 +-
 pandas/core/frame.py       | 5 +++--
 pandas/core/generic.py     | 2 +-
 pandas/core/groupby.py     | 2 +-
 pandas/core/index.py       | 1 -
 pandas/core/indexing.py    | 5 +----
 pandas/core/nanops.py      | 3 ---
 pandas/core/panel.py       | 2 --
 pandas/core/panelnd.py     | 1 -
 pandas/core/reshape.py     | 3 +--
 pandas/core/series.py      | 4 ++--
 pandas/util/decorators.py  | 4 ++--
 13 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 2f171cdd6adf3..540b900844a9e 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1,13 +1,10 @@
 """
 Base and utility classes for pandas objects.
 """
-import datetime
-
 from pandas import compat
 import numpy as np
 from pandas.core import common as com
 import pandas.core.nanops as nanops
-import pandas.tslib as tslib
 import pandas.lib as lib
 from pandas.util.decorators import Appender, cache_readonly
 from pandas.core.strings import StringMethods
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index ffb3c429d250b..c5cd8390359dc 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -18,7 +18,7 @@
                                 _possibly_infer_to_datetimelike, get_dtype_kinds,
                                 is_list_like, is_sequence, is_null_slice, is_bool,
                                 _ensure_platform_int, _ensure_object, _ensure_int64,
-                                _coerce_indexer_dtype, _values_from_object, take_1d)
+                                _coerce_indexer_dtype, take_1d)
 from pandas.util.terminal import get_terminal_size
 from pandas.core.config import get_option
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7cce560baa1fc..21cabdbe03951 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -26,8 +26,9 @@
 from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
                                 _default_index, _maybe_upcast, is_sequence,
                                 _infer_dtype_from_scalar, _values_from_object,
-                                is_list_like, _get_dtype, _maybe_box_datetimelike,
-                                is_categorical_dtype, is_object_dtype, _possibly_infer_to_datetimelike)
+                                is_list_like, _maybe_box_datetimelike,
+                                is_categorical_dtype, is_object_dtype,
+                                _possibly_infer_to_datetimelike)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (maybe_droplevels,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4fb08a7b7e107..a560dd4c00be7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -17,7 +17,7 @@
 import pandas.core.common as com
 import pandas.core.datetools as datetools
 from pandas import compat
-from pandas.compat import map, zip, lrange, string_types, isidentifier, lmap
+from pandas.compat import map, zip, lrange, string_types, isidentifier
 from pandas.core.common import (isnull, notnull, is_list_like,
                                 _values_from_object, _maybe_promote,
                                 _maybe_box_datetimelike, ABCSeries,
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 8b8e5e4c2fe02..4b7d8b9796f01 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -14,7 +14,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.index import Index, MultiIndex, CategoricalIndex, _ensure_index, _union_indexes
+from pandas.core.index import Index, MultiIndex, CategoricalIndex, _ensure_index
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
 from pandas.core.panel import Panel
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 21f1fed2cd6da..de30fee4009f4 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -8,7 +8,6 @@
 from pandas import compat
 import numpy as np
 
-from math import ceil
 from sys import getsizeof
 import pandas.tslib as tslib
 import pandas.lib as lib
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 7c373b0a2b01d..e0f06e22c431b 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1,7 +1,6 @@
 # pylint: disable=W0223
 
-from datetime import datetime
-from pandas.core.index import Index, MultiIndex, _ensure_index
+from pandas.core.index import Index, MultiIndex
 from pandas.compat import range, zip
 import pandas.compat as compat
 import pandas.core.common as com
@@ -10,8 +9,6 @@
                                 is_null_slice,
                                 ABCSeries, ABCDataFrame, ABCPanel, is_float,
                                 _values_from_object, _infer_fill_value, is_integer)
-import pandas.lib as lib
-
 import numpy as np
 
 # the supported indexers
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 4121dd8e89bee..e921a9d562bc1 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1,7 +1,5 @@
-import sys
 import itertools
 import functools
-
 import numpy as np
 
 try:
@@ -10,7 +8,6 @@
 except ImportError:  # pragma: no cover
     _USE_BOTTLENECK = False
 
-import pandas.core.common as com
 import pandas.hashtable as _hash
 from pandas import compat, lib, algos, tslib
 from pandas.compat import builtins
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 2cd2412cfac66..1215efe3a4db6 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -6,7 +6,6 @@
 from pandas.compat import (map, zip, range, lrange, lmap, u, OrderedDict,
                            OrderedDefaultdict)
 from pandas import compat
-import sys
 import warnings
 import numpy as np
 from pandas.core.common import (PandasError, _try_sort, _default_index,
@@ -27,7 +26,6 @@
                                     deprecate_kwarg)
 import pandas.core.common as com
 import pandas.core.ops as ops
-import pandas.core.nanops as nanops
 import pandas.computation.expressions as expressions
 from pandas import lib
 
diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py
index d021cb2d59ecf..35e6412efc760 100644
--- a/pandas/core/panelnd.py
+++ b/pandas/core/panelnd.py
@@ -1,6 +1,5 @@
 """ Factory methods to create N-D panels """
 
-import pandas.lib as lib
 from pandas.compat import zip
 import pandas.compat as compat
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index 9a812ec71b9a2..3225b4aa33ac2 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -14,8 +14,7 @@
 from pandas._sparse import IntIndex
 
 from pandas.core.categorical import Categorical
-from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
-                                isnull)
+from pandas.core.common import notnull, _ensure_platform_int, _maybe_promote
 from pandas.core.groupby import get_group_index, _compress_group_index
 
 import pandas.core.common as com
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 95b6a6aa1e7dd..8ef9adb1d24a4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -19,8 +19,8 @@
                                 is_list_like, _values_from_object,
                                 _possibly_cast_to_datetime, _possibly_castable,
                                 _possibly_convert_platform, _try_sort,
-                                ABCSparseArray, _maybe_match_name, _coerce_to_dtype,
-                                _ensure_object, SettingWithCopyError,
+                                ABCSparseArray, _maybe_match_name,
+                                _coerce_to_dtype, SettingWithCopyError,
                                 _maybe_box_datetimelike, ABCDataFrame)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index)
diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py
index d839437a6fe33..9cd538511e946 100644
--- a/pandas/util/decorators.py
+++ b/pandas/util/decorators.py
@@ -26,7 +26,7 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None):
         Name of prefered argument in function
     mapping : dict or callable
         If mapping is present, use it to translate old arguments to
-        new arguments.  A callable must do its own value checking;
+        new arguments. A callable must do its own value checking;
         values not found in a dict will be forwarded unchanged.
 
     Examples
@@ -45,7 +45,7 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None):
     should raise warning
     >>> f(cols='should error', columns="can't pass do both")
     TypeError: Can only specify 'cols' or 'columns', not both
-    >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no', False})
+    >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
     ... def f(new=False):
     ...     print('yes!' if new else 'no!')
     ...

From 1bfc3bff87dc2919c9c135d6ead62ad5181c0691 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 May 2015 10:37:51 +0200
Subject: [PATCH 202/239] DOC: update docs regarding to return_type/expand (due
 to GH10085)

---
 doc/source/text.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/text.rst b/doc/source/text.rst
index 810e3e0146f9f..d40445d8490f7 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -82,11 +82,11 @@ Elements in the split lists can be accessed using ``get`` or ``[]`` notation:
    s2.str.split('_').str.get(1)
    s2.str.split('_').str[1]
 
-Easy to expand this to return a DataFrame using ``return_type``.
+Easy to expand this to return a DataFrame using ``expand``.
 
 .. ipython:: python
 
-   s2.str.split('_', return_type='frame')
+   s2.str.split('_', expand=True)
 
 Methods like ``replace`` and ``findall`` take `regular expressions
 <https://docs.python.org/2/library/re.html>`__, too:

From e7d93cdaa27589f0c071e9be8514743d4cc27f8b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 May 2015 10:48:00 +0200
Subject: [PATCH 203/239] DOC: fix some doc build errors

- wrap docstring -> raw indication needed for newlines characters
- using anonymous hyperlinks (two underscores instead of one) for preventing warning on same names
- some missing imports and whitespace
---
 doc/source/basics.rst        |  2 +-
 doc/source/contributing.rst  | 10 +++++-----
 doc/source/install.rst       |  4 ++--
 doc/source/visualization.rst |  4 ++--
 pandas/core/strings.py       |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 76efdc0553c7d..6c743352a34ae 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -236,7 +236,7 @@ see :ref:`here<indexing.boolean>`
 Boolean Reductions
 ~~~~~~~~~~~~~~~~~~
 
-    You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`,
+You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`,
 :meth:`~DataFrame.all`, and :meth:`~DataFrame.bool` to provide a
 way to summarize a boolean result.
 
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 1ece60bf704d6..1f58992dba017 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -113,10 +113,10 @@ This creates the directory `pandas-yourname` and connects your repository to
 the upstream (main project) *pandas* repository.
 
 The testing suite will run automatically on Travis-CI once your Pull Request is
-submitted.  However, if you wish to run the test suite on a branch prior to 
+submitted.  However, if you wish to run the test suite on a branch prior to
 submitting the Pull Request, then Travis-CI needs to be hooked up to your
 GitHub repository.  Instructions are for doing so are `here
-<http://about.travis-ci.org/docs/user/getting-started/>`_.
+<http://about.travis-ci.org/docs/user/getting-started/>`__.
 
 Creating a Branch
 -----------------
@@ -219,7 +219,7 @@ To return to you home root environment:
       deactivate
 
 See the full ``conda`` docs `here
-<http://conda.pydata.org/docs>`_.
+<http://conda.pydata.org/docs>`__.
 
 At this point you can easily do an *in-place* install, as detailed in the next section.
 
@@ -372,7 +372,7 @@ If you want to do a full clean build, do::
 Starting with 0.13.1 you can tell ``make.py`` to compile only a single section
 of the docs, greatly reducing the turn-around time for checking your changes.
 You will be prompted to delete `.rst` files that aren't required.  This is okay
-since the prior version can be checked out from git, but make sure to 
+since the prior version can be checked out from git, but make sure to
 not commit the file deletions.
 
 ::
@@ -401,7 +401,7 @@ Built Master Branch Documentation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When pull-requests are merged into the pandas *master* branch, the main parts of the documentation are
-also built by Travis-CI. These docs are then hosted `here <http://pandas-docs.github.io/pandas-docs-travis>`_.
+also built by Travis-CI. These docs are then hosted `here <http://pandas-docs.github.io/pandas-docs-travis>`__.
 
 Contributing to the code base
 =============================
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 79adab0463588..3aa6b338e3397 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -35,7 +35,7 @@ pandas at all.
 Simply create an account, and have access to pandas from within your brower via
 an `IPython Notebook <http://ipython.org/notebook.html>`__ in a few minutes.
 
-.. _install.anaconda
+.. _install.anaconda:
 
 Installing pandas with Anaconda
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -68,7 +68,7 @@ admin rights to install it, it will install in the user's home directory, and
 this also makes it trivial to delete Anaconda at a later date (just delete
 that folder).
 
-.. _install.miniconda
+.. _install.miniconda:
 
 Installing pandas with Miniconda
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 6dfeeadeb0167..51912b5d6b106 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -220,8 +220,8 @@ Histogram can be drawn specifying ``kind='hist'``.
 
 .. ipython:: python
 
-   df4 = pd.DataFrame({'a': randn(1000) + 1, 'b': randn(1000),
-                       'c': randn(1000) - 1}, columns=['a', 'b', 'c'])
+   df4 = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000),
+                       'c': np.random.randn(1000) - 1}, columns=['a', 'b', 'c'])
 
    plt.figure();
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 8da43c18b989f..f4ac0166cf44b 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -813,7 +813,7 @@ def str_strip(arr, to_strip=None, side='both'):
 
 
 def str_wrap(arr, width, **kwargs):
-    """
+    r"""
     Wrap long strings in the Series/Index to be formatted in
     paragraphs with length less than a given width.
 

From 98da29b84a3044fe846e5bbe4808cec572215047 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 May 2015 12:14:11 +0200
Subject: [PATCH 204/239] DOC: update section on CategoricalIndex in
 categorical docs

---
 doc/source/categorical.rst | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 11e7fb0fd4117..c05d4045e6fcc 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -813,12 +813,16 @@ basic type) and applying along columns will also convert to object.
     df.apply(lambda row: type(row["cats"]), axis=1)
     df.apply(lambda col: col.dtype, axis=0)
 
-No Categorical Index
-~~~~~~~~~~~~~~~~~~~~
+Categorical Index
+~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.16.1
+
+A new ``CategoricalIndex`` index type is introduced in version 0.16.1. See the
+:ref:`advanced indexing docs <indexing.categoricalindex>` for a more detailed
+explanation.
 
-There is currently no index of type ``category``, so setting the index to categorical column will
-convert the categorical data to a "normal" dtype first and therefore remove any custom
-ordering of the categories:
+Setting the index, will create create a ``CategoricalIndex``
 
 .. ipython:: python
 
@@ -827,13 +831,12 @@ ordering of the categories:
     values = [4,2,3,1]
     df = DataFrame({"strings":strings, "values":values}, index=cats)
     df.index
-    # This should sort by categories but does not as there is no CategoricalIndex!
+    # This now sorts by the categories order
     df.sort_index()
 
-.. note::
-    This could change if a `CategoricalIndex` is implemented (see
-    https://github.com/pydata/pandas/issues/7629)
-
+In previous versions (<0.16.1) there is no index of type ``category``, so
+setting the index to categorical column will convert the categorical data to a
+"normal" dtype first and therefore remove any custom ordering of the categories.
 
 Side Effects
 ~~~~~~~~~~~~

From 65697427f1fbb4b0f36fb917852fec116f2f42b9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 May 2015 11:39:54 +0200
Subject: [PATCH 205/239] DOC: last clean-up of whatsnew file 0.16.1

---
 doc/source/whatsnew/v0.16.1.txt | 172 +++++++++++++++-----------------
 1 file changed, 83 insertions(+), 89 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 64752e9fe05be..fa82a90f2a429 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -31,44 +31,6 @@ Highlights include:
 Enhancements
 ~~~~~~~~~~~~
 
-- ``BusinessHour`` offset is now supported, which represents business hours starting from 09:00 - 17:00 on ``BusinessDay`` by default. See :ref:`Here <timeseries.businesshour>` for details. (:issue:`7905`)
-
-  .. ipython:: python
-
-     Timestamp('2014-08-01 09:00') + BusinessHour()
-     Timestamp('2014-08-01 07:00') + BusinessHour()
-     Timestamp('2014-08-01 16:30') + BusinessHour()
-
-- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
-
-- Allow ``clip``, ``clip_lower``, and ``clip_upper`` to accept array-like arguments as thresholds (This is a regression from 0.11.0). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). (:issue:`6966`)
-
-- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
-
-- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`)
-
-  .. ipython:: python
-
-    df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
-    df.drop(['A', 'X'], axis=1, errors='ignore')
-
-- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
-- ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`)
-- ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`)
-
-- Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
-- Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`)
-
-- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
-- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
-
-- Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
-- Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`)
-
-- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
-
-- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`)
-
 .. _whatsnew_0161.enhancements.categoricalindex:
 
 CategoricalIndex
@@ -188,16 +150,6 @@ String Methods Enhancements
 :ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, the following
 enhancements make string operations easier and more consistent with standard python string operations.
 
-- The following new methods are accesible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`)
-
-  ================  ===============  ===============  ===============  ================
-  ..                ..               Methods          ..               ..
-  ================  ===============  ===============  ===============  ================
-  ``capitalize()``  ``swapcase()``   ``normalize()``  ``partition()``  ``rpartition()``
-  ``index()``       ``rindex()``     ``translate()``
-  ================  ===============  ===============  ===============  ================
-
-
 
 - Added ``StringMethods`` (``.str`` accessor) to ``Index`` (:issue:`9068`)
 
@@ -220,6 +172,14 @@ enhancements make string operations easier and more consistent with standard pyt
      idx.str.startswith('a')
      s[s.index.str.startswith('a')]
 
+- The following new methods are accesible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`)
+
+  ================  ===============  ===============  ===============  ================
+  ..                ..               Methods          ..               ..
+  ================  ===============  ===============  ===============  ================
+  ``capitalize()``  ``swapcase()``   ``normalize()``  ``partition()``  ``rpartition()``
+  ``index()``       ``rindex()``     ``translate()``
+  ================  ===============  ===============  ===============  ================
 
 - ``split`` now takes ``expand`` keyword to specify whether to expand dimensionality. ``return_type`` is deprecated. (:issue:`9847`)
 
@@ -244,14 +204,59 @@ enhancements make string operations easier and more consistent with standard pyt
 
 - Improved ``extract`` and ``get_dummies`` methods for ``Index.str`` (:issue:`9980`)
 
-.. _whatsnew_0161.api:
 
-API changes
-~~~~~~~~~~~
+.. _whatsnew_0161.enhancements.other:
+
+Other Enhancements
+^^^^^^^^^^^^^^^^^^
+
+- ``BusinessHour`` offset is now supported, which represents business hours starting from 09:00 - 17:00 on ``BusinessDay`` by default. See :ref:`Here <timeseries.businesshour>` for details. (:issue:`7905`)
+
+  .. ipython:: python
 
+     from pandas.tseries.offsets import BusinessHour
+     Timestamp('2014-08-01 09:00') + BusinessHour()
+     Timestamp('2014-08-01 07:00') + BusinessHour()
+     Timestamp('2014-08-01 16:30') + BusinessHour()
 
+- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
 
+- Allow ``clip``, ``clip_lower``, and ``clip_upper`` to accept array-like arguments as thresholds (This is a regression from 0.11.0). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). (:issue:`6966`)
+
+- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
 
+- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`)
+
+  .. ipython:: python
+
+    df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
+    df.drop(['A', 'X'], axis=1, errors='ignore')
+
+- Add support for separating years and quarters using dashes, for
+  example 2014-Q1.  (:issue:`9688`)
+
+- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`)
+- ``get_dummies`` function now accepts ``sparse`` keyword.  If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`)
+- ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`)
+
+- Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`)
+- Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`)
+
+- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
+- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`)
+
+- Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`)
+- Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`)
+
+- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
+
+- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`)
+
+
+.. _whatsnew_0161.api:
+
+API changes
+~~~~~~~~~~~
 
 - When passing in an ax to ``df.plot( ..., ax=ax)``, the `sharex` kwarg will now default to `False`.
   The result is that the visibility of xlabels and xticklabels will not anymore be changed. You
@@ -260,16 +265,19 @@ API changes
   If pandas creates the subplots itself (e.g. no passed in `ax` kwarg), then the
   default is still ``sharex=True`` and the visibility changes are applied.
 
-
-
-- Add support for separating years and quarters using dashes, for
-  example 2014-Q1.  (:issue:`9688`)
-
 - :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously
   the order was arbitrary. (:issue:`9777`)
 
 - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
+.. _whatsnew_0161.deprecations:
+
+Deprecations
+^^^^^^^^^^^^
+
+- ``Series.str.split``'s ``return_type`` keyword was removed in favor of ``expand`` (:issue:`9847`)
+
+
 .. _whatsnew_0161.index_repr:
 
 Index Representation
@@ -303,25 +311,17 @@ New Behavior
 
 .. ipython:: python
 
-   pd.set_option('display.width',100)
-   pd.Index(range(4),name='foo')
-   pd.Index(range(25),name='foo')
-   pd.Index(range(104),name='foo')
-   pd.Index(['datetime', 'sA', 'sB', 'sC', 'flow', 'error', 'temp', 'ref', 'a_bit_a_longer_one']*2)
-   pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar')
-   pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar')
-   pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar')
-   pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar')
-   pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
-   pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern')
-   pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
-
-.. _whatsnew_0161.deprecations:
+   pd.set_option('display.width', 80)
+   pd.Index(range(4), name='foo')
+   pd.Index(range(30), name='foo')
+   pd.Index(range(104), name='foo')
+   pd.CategoricalIndex(['a','bb','ccc','dddd'], ordered=True, name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*10, ordered=True, name='foobar')
+   pd.CategoricalIndex(['a','bb','ccc','dddd']*100, ordered=True, name='foobar')
+   pd.date_range('20130101',periods=4, name='foo', tz='US/Eastern')
+   pd.date_range('20130101',periods=25, freq='D')
+   pd.date_range('20130101',periods=104, name='foo', tz='US/Eastern')
 
-Deprecations
-^^^^^^^^^^^^
-
-- ``Series.str.split``'s ``return_type`` keyword was removed in favor of ``expand`` (:issue:`9847`)
 
 .. _whatsnew_0161.performance:
 
@@ -333,7 +333,6 @@ Performance Improvements
 - Improved the performance of ``pd.lib.max_len_string_array`` by 5-7x (:issue:`10024`)
 
 
-
 .. _whatsnew_0161.bug_fixes:
 
 Bug Fixes
@@ -361,7 +360,6 @@ Bug Fixes
 - Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`)
 - Bug in ``setup.py`` that would allow an incompat cython version to build (:issue:`9827`)
 - Bug in plotting ``secondary_y`` incorrectly attaches ``right_ax`` property to secondary axes specifying itself recursively. (:issue:`9861`)
-
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
 <<<<<<< HEAD
@@ -422,6 +420,7 @@ Bug Fixes
 >>>>>>> 1f9b699... BUG: where behaves badly when dtype of self is datetime or timedelta, and dtype of other is not (GH9804)
 - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> 39fa180... FIX: timeseries asfreq would drop the name of the index, closes #9854
 - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`)
 <<<<<<< HEAD
@@ -430,19 +429,15 @@ Bug Fixes
 =======
 >>>>>>> ad1abce... DOC: fix incorrect issue numbers in whatsnew
 
+=======
+>>>>>>> fad6079... DOC: last clean-up of whatsnew file 0.16.1
 - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
 - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
 - Bug in C csv parser causing spurious NaNs when data started with newline followed by whitespace. (:issue:`10022`)
-
 - Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
 - Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)
-
 - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`)
-
-
-
-
 - Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`)
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -481,14 +476,7 @@ Bug Fixes
 >>>>>>> 514fe2d... BUG: DataFrame constructor fails when columns is set and data=[] (GH9948/9939)
 - Bug in bar plot with ``log=True`` raises ``TypeError`` if all values are less than 1 (:issue:`9905`)
 - Bug in horizontal bar plot ignores ``log=True`` (:issue:`9905`)
-
-
-
 - Bug in PyTables queries that did not return proper results using the index (:issue:`8265`, :issue:`9676`)
-
-
-
-
 - Bug where dividing a dataframe containing values of type ``Decimal`` by another ``Decimal`` would raise. (:issue:`9787`)
 <<<<<<< HEAD
 >>>>>>> bed38f2... FIX: division of Decimal would crash on fill because Decimal does not support type or dtype. (GH9787)
@@ -507,21 +495,27 @@ Bug Fixes
 >>>>>>> c0d4339... BUG: Resample BM/BQ adds extra index point #9756
 - Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> b1e8d9f... Moved caching in `AbstractHolidayCalendar` to the instance level
 =======
 
+=======
+>>>>>>> fad6079... DOC: last clean-up of whatsnew file 0.16.1
 - Fixed latex output for multi-indexed dataframes (:issue:`9778`)
 <<<<<<< HEAD
 >>>>>>> 4d1268e... BUG: Fixed latex output for multi-indexed dataframes - GH9778
 =======
 - Bug causing an exception when setting an empty range using ``DataFrame.loc`` (:issue:`9596`)
 <<<<<<< HEAD
+<<<<<<< HEAD
 >>>>>>> a21f2ce... BUG: Exception when setting an empty range using DataFrame.loc
 =======
 
 
+=======
+>>>>>>> fad6079... DOC: last clean-up of whatsnew file 0.16.1
 - Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
 <<<<<<< HEAD
 >>>>>>> d3ccb70... BUG: hidden ticklabels with sharex and secondary

From 9672bd4ce47be07ab082e2d15ab4551a8afb4024 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 5 Jul 2014 16:12:02 +0200
Subject: [PATCH 206/239] DOC: remove mention of TimeSeries in docs

---
 doc/source/dsintro.rst    |  6 ++----
 doc/source/faq.rst        |  7 +++----
 doc/source/overview.rst   |  3 +--
 doc/source/timeseries.rst | 10 +++++-----
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index adcf2fca9b4c5..9221f2685d79b 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -577,10 +577,8 @@ row-wise. For example:
 
    df - df.iloc[0]
 
-In the special case of working with time series data, if the Series is a
-TimeSeries (which it will be automatically if the index contains datetime
-objects), and the DataFrame index also contains dates, the broadcasting will be
-column-wise:
+In the special case of working with time series data, and the DataFrame index
+also contains dates, the broadcasting will be column-wise:
 
 .. ipython:: python
    :okwarning:
diff --git a/doc/source/faq.rst b/doc/source/faq.rst
index 20762e3fc039f..1fc8488e92fde 100644
--- a/doc/source/faq.rst
+++ b/doc/source/faq.rst
@@ -207,9 +207,9 @@ properties. Here are the pandas equivalents:
 Frequency conversion
 ~~~~~~~~~~~~~~~~~~~~
 
-Frequency conversion is implemented using the ``resample`` method on TimeSeries
-and DataFrame objects (multiple time series). ``resample`` also works on panels
-(3D). Here is some code that resamples daily data to monthly:
+Frequency conversion is implemented using the ``resample`` method on Series
+and DataFrame objects with a DatetimeIndex or PeriodIndex. ``resample`` also
+works on panels (3D). Here is some code that resamples daily data to montly:
 
 .. ipython:: python
 
@@ -369,4 +369,3 @@ just a thin layer around the ``QTableView``.
 	    mw = MainWidget()
 	    mw.show()
 	    app.exec_()
-
diff --git a/doc/source/overview.rst b/doc/source/overview.rst
index 49a788def2854..b1addddc2121d 100644
--- a/doc/source/overview.rst
+++ b/doc/source/overview.rst
@@ -9,7 +9,7 @@ Package overview
 :mod:`pandas` consists of the following things
 
  * A set of labeled array data structures, the primary of which are
-   Series/TimeSeries and DataFrame
+   Series and DataFrame
  * Index objects enabling both simple axis indexing and multi-level /
    hierarchical axis indexing
  * An integrated group by engine for aggregating and transforming data sets
@@ -32,7 +32,6 @@ Data structures at a glance
     :widths: 15, 20, 50
 
     1, Series, "1D labeled homogeneously-typed array"
-    1, TimeSeries, "Series with index containing datetimes"
     2, DataFrame, "General 2D labeled, size-mutable tabular structure with
     potentially heterogeneously-typed columns"
     3, Panel, "General 3D labeled, also size-mutable array"
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index b69b523d9c908..ce1035e91391a 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1008,7 +1008,7 @@ Time series-related instance methods
 Shifting / lagging
 ~~~~~~~~~~~~~~~~~~
 
-One may want to *shift* or *lag* the values in a TimeSeries back and forward in
+One may want to *shift* or *lag* the values in a time series back and forward in
 time. The method for this is ``shift``, which is available on all of the pandas
 objects.
 
@@ -1026,7 +1026,7 @@ The shift method accepts an ``freq`` argument which can accept a
    ts.shift(5, freq='BM')
 
 Rather than changing the alignment of the data and the index, ``DataFrame`` and
-``TimeSeries`` objects also have a ``tshift`` convenience method that changes
+``Series`` objects also have a ``tshift`` convenience method that changes
 all the dates in the index by a specified number of offsets:
 
 .. ipython:: python
@@ -1569,7 +1569,7 @@ time zones using ``tz_convert``:
    rng_berlin[5]
    rng_eastern[5].tz_convert('Europe/Berlin')
 
-Localization of Timestamps functions just like DatetimeIndex and TimeSeries:
+Localization of Timestamps functions just like DatetimeIndex and Series:
 
 .. ipython:: python
 
@@ -1577,8 +1577,8 @@ Localization of Timestamps functions just like DatetimeIndex and TimeSeries:
    rng[5].tz_localize('Asia/Shanghai')
 
 
-Operations between TimeSeries in different time zones will yield UTC
-TimeSeries, aligning the data on the UTC timestamps:
+Operations between Series in different time zones will yield UTC
+Series, aligning the data on the UTC timestamps:
 
 .. ipython:: python
 

From 6dd2b8390d2015f7e8ef2a39e15dd36db32e5cd2 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Thu, 14 May 2015 16:07:07 -0700
Subject: [PATCH 207/239] CLN: clean up unused imports part II

---
 pandas/tseries/base.py       | 8 +++-----
 pandas/tseries/common.py     | 2 +-
 pandas/tseries/index.py      | 5 -----
 pandas/tseries/interval.py   | 1 -
 pandas/tseries/period.py     | 6 +-----
 pandas/tseries/plotting.py   | 6 +-----
 pandas/tseries/resample.py   | 3 ---
 pandas/tseries/tdi.py        | 4 ----
 pandas/tseries/timedeltas.py | 8 +++-----
 pandas/tseries/util.py       | 3 ---
 10 files changed, 9 insertions(+), 37 deletions(-)

diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index f3a7aa0bfa4c6..88b4117d4807c 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -3,7 +3,7 @@
 """
 
 import warnings
-from datetime import datetime, time, timedelta
+from datetime import datetime, timedelta
 
 from pandas import compat
 import numpy as np
@@ -13,11 +13,9 @@
 import pandas.lib as lib
 from pandas.core.index import Index
 from pandas.util.decorators import Appender, cache_readonly
-from pandas.tseries.frequencies import (
-    infer_freq, to_offset, get_period_alias,
-    Resolution)
+from pandas.tseries.frequencies import infer_freq, to_offset, Resolution
 import pandas.algos as _algos
-from pandas.core.config import get_option
+
 
 class DatetimeIndexOpsMixin(object):
     """ common ops mixin to support a unified inteface datetimelike Index """
diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py
index 8e468a7701462..c273906ef3d05 100644
--- a/pandas/tseries/common.py
+++ b/pandas/tseries/common.py
@@ -6,7 +6,7 @@
 from pandas.tseries.index import DatetimeIndex
 from pandas.tseries.period import PeriodIndex
 from pandas.tseries.tdi import TimedeltaIndex
-from pandas import lib, tslib
+from pandas import tslib
 from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike,
                                 is_datetime_arraylike, is_integer_dtype, is_list_like,
                                 get_dtype_kinds)
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index f56b40a70d551..f3803a04baf01 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1,13 +1,8 @@
 # pylint: disable=E1101
 import operator
-
 from datetime import time, datetime
 from datetime import timedelta
-
 import numpy as np
-
-import warnings
-
 from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
                                 _values_from_object, _maybe_box,
                                 ABCSeries, is_integer, is_float)
diff --git a/pandas/tseries/interval.py b/pandas/tseries/interval.py
index 104e088ee4e84..bcce64c3a71bf 100644
--- a/pandas/tseries/interval.py
+++ b/pandas/tseries/interval.py
@@ -1,4 +1,3 @@
-import numpy as np
 
 from pandas.core.index import Index
 
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 8b7dc90738bd0..98d9f9f14d3da 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -1,10 +1,6 @@
 # pylint: disable=E1101,E1103,W0232
-import operator
-
-from datetime import datetime, date, timedelta
+from datetime import datetime, timedelta
 import numpy as np
-from pandas.core.base import PandasObject
-
 import pandas.tseries.frequencies as frequencies
 from pandas.tseries.frequencies import get_freq_code as _gfc
 from pandas.tseries.index import DatetimeIndex, Int64Index, Index
diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py
index 899d2bfdc9c76..9d28fa11f646f 100644
--- a/pandas/tseries/plotting.py
+++ b/pandas/tseries/plotting.py
@@ -5,17 +5,13 @@
 
 #!!! TODO: Use the fact that axis can have units to simplify the process
 from matplotlib import pylab
-
-import numpy as np
-
-from pandas import isnull
 from pandas.tseries.period import Period
 from pandas.tseries.offsets import DateOffset
 import pandas.tseries.frequencies as frequencies
 from pandas.tseries.index import DatetimeIndex
 import pandas.core.common as com
 
-from pandas.tseries.converter import (PeriodConverter, TimeSeries_DateLocator,
+from pandas.tseries.converter import (TimeSeries_DateLocator,
                                       TimeSeries_DateFormatter)
 
 #----------------------------------------------------------------------
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 942dea84f501a..53c1292204f71 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -1,14 +1,11 @@
 from datetime import timedelta
-
 import numpy as np
-
 from pandas.core.groupby import BinGrouper, Grouper
 from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod
 from pandas.tseries.index import DatetimeIndex, date_range
 from pandas.tseries.tdi import TimedeltaIndex
 from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds
 from pandas.tseries.period import PeriodIndex, period_range
-import pandas.tseries.tools as tools
 import pandas.core.common as com
 import pandas.compat as compat
 
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index 80475fc8426db..fd97ca4c45fc0 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -1,17 +1,13 @@
 """ implement the TimedeltaIndex """
 
-import operator
-import datetime
 from datetime import timedelta
 import numpy as np
-
 from pandas.core.common import (ABCSeries, _TD_DTYPE, _INT64_DTYPE,
                                 is_timedelta64_dtype, _maybe_box,
                                 _values_from_object, isnull, is_integer, is_float)
 from pandas.core.index import Index, Int64Index
 import pandas.compat as compat
 from pandas.compat import u
-from pandas.core.base import PandasObject
 from pandas.util.decorators import cache_readonly
 from pandas.tseries.frequencies import to_offset
 import pandas.core.common as com
diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py
index 5b353058f0093..624981c5536f5 100644
--- a/pandas/tseries/timedeltas.py
+++ b/pandas/tseries/timedeltas.py
@@ -3,14 +3,12 @@
 """
 
 import re
-from datetime import timedelta
-
 import numpy as np
 import pandas.tslib as tslib
 from pandas import compat
-from pandas.core.common import (ABCSeries, is_integer, is_integer_dtype,
-                                is_timedelta64_dtype, _values_from_object,
-                                is_list_like, isnull, _ensure_object)
+from pandas.core.common import (ABCSeries, is_integer_dtype,
+                                is_timedelta64_dtype, is_list_like,
+                                isnull, _ensure_object)
 
 def to_timedelta(arg, unit='ns', box=True, coerce=False):
     """
diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py
index 72b12ea495ba0..6c534de0a7aaa 100644
--- a/pandas/tseries/util.py
+++ b/pandas/tseries/util.py
@@ -1,8 +1,5 @@
 from pandas.compat import range, lrange
 import numpy as np
-
-import pandas as pd
-
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 import pandas.core.nanops as nanops

From 2d581dd47eaec07d02774bbd7e1d49d41f4fa7a2 Mon Sep 17 00:00:00 2001
From: Justin Lecher <jlec@gentoo.org>
Date: Sun, 21 Dec 2014 19:31:04 +0100
Subject: [PATCH 208/239] ENH: Use datetutil.tz.gettz() instead of
 dateutil.zoneinfo.gettz()

python-dateutil provides two implementations for gettz(), tz.gettz() and
zoneinfo.gettz(). The former tries first to use system provided timezone data,
where as the later always uses a bundled tarball. Upstreams recommandation
for library consumers is only using tz.gettz() (1 & 2). Further more, on
system which do not install the zoninfo tarball (e.g. Debian, Gentoo and
Fedora) but rely on the system zoneinfo files the direct usage of
zoneinfo.gettz() creates problems which result in test failures (3 - 6).

For compatibility in pandas code

    pandas.tslib._dateutil_gettz()

should be used.

1 https://github.com/dateutil/dateutil/issues/8
2 https://github.com/dateutil/dateutil/issues/11
3 https://github.com/pydata/pandas/issues/9059
4 https://github.com/pydata/pandas/issues/8639
5 https://github.com/pydata/pandas/issues/10121
6 https://github.com/pydata/pandas/issues/9663

Signed-off-by: Justin Lecher <jlec@gentoo.org>
---
 doc/source/whatsnew/v0.17.0.txt         | 23 +++++++++++++++++++++--
 pandas/compat/__init__.py               | 14 ++++++++++++++
 pandas/tests/test_series.py             |  3 ++-
 pandas/tseries/tests/test_daterange.py  |  2 +-
 pandas/tseries/tests/test_period.py     |  8 ++++----
 pandas/tseries/tests/test_timeseries.py |  6 +++---
 pandas/tslib.pyx                        |  6 +++++-
 7 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index cc044bc35a707..785f35083a24e 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -26,6 +26,26 @@ New features
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
+- Add helper functions to check for OS running on
+
+  from pandas.compat import is_platform_windows
+  if is_platform_window():
+      pass
+
+- Use dateutil.tz.gettz() after upstream recommandations
+
+  python-dateutil provides two implementations of gettz().
+  "dateutil.tz.gettz()" tries to load zone information from system provided data and fals back to 
+  an included tarball, where as "dateutil.zoneinfo.gettz() loads directly from the tarball. Using the later on systems
+  which aren't providing included zone informations (e.g. Fedora or Gentoo) breaks (#9059, #8639, #9663 and #10121)
+  As stated by upstream in https://github.com/dateutil/dateutil/issues/11#issuecomment-70769019 only the former should be
+  used by library consumers.
+
+  For compatibility in pandas following code should be used
+
+  from pandas.tslib import _dateutil_gettz as gettz
+  tz = gettz('Europe/Brussels')
+
 .. _whatsnew_0170.api:
 
 Backwards incompatible API changes
@@ -66,5 +86,4 @@ Bug Fixes
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
-
-
+- Bug in dateutil.tz.gettz() vs. dateutil.zoneinfo.gettz() usage which creates problems on systems solely rely on systems timezone data (:issue:`9123`, :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 6be0facf2bffc..2a273629544cb 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -26,6 +26,7 @@
 
 Other items:
 * OrderedDefaultDict
+* platform checker
 """
 # pylint disable=W0611
 import functools
@@ -754,3 +755,16 @@ def __missing__(self, key):
     def __reduce__(self):  # optional, for pickle support
         args = self.default_factory if self.default_factory else tuple()
         return type(self), args, None, None, list(self.items())
+
+
+# https://github.com/pydata/pandas/pull/9123
+def is_platform_windows():
+    return sys.platform == 'win32' or sys.platform == 'cygwin'
+
+
+def is_platform_linux():
+    return sys.platform == 'linux2'
+
+
+def is_platform_mac():
+    return sys.platform == 'darwin'
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 22f8aee1e0a4e..925cfa875196c 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5398,7 +5398,8 @@ def test_getitem_setitem_datetime_tz_pytz(self):
     def test_getitem_setitem_datetime_tz_dateutil(self):
         tm._skip_if_no_dateutil();
         from dateutil.tz import tzutc
-        from dateutil.zoneinfo import gettz
+        from pandas.tslib import _dateutil_gettz as gettz
+
         tz = lambda x: tzutc() if x == 'UTC' else gettz(x)  # handle special case for utc in dateutil
 
         from pandas import date_range
diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py
index 841d81c15b4e9..69b1d84670d45 100644
--- a/pandas/tseries/tests/test_daterange.py
+++ b/pandas/tseries/tests/test_daterange.py
@@ -441,7 +441,7 @@ def test_month_range_union_tz_pytz(self):
     def test_month_range_union_tz_dateutil(self):
         _skip_if_windows_python_3()
         tm._skip_if_no_dateutil()
-        from dateutil.zoneinfo import gettz as timezone
+        from pandas.tslib import _dateutil_gettz as timezone
         tz = timezone('US/Eastern')
 
         early_start = datetime(2011, 1, 1)
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 70c706fc66398..0218af63ca7d6 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -101,15 +101,15 @@ def test_timestamp_tz_arg(self):
                          pytz.timezone('Europe/Brussels').normalize(p).tzinfo)
 
     def test_timestamp_tz_arg_dateutil(self):
-        import dateutil
+        from pandas.tslib import _dateutil_gettz as gettz
         from pandas.tslib import maybe_get_tz
         p = Period('1/1/2005', freq='M').to_timestamp(tz=maybe_get_tz('dateutil/Europe/Brussels'))
-        self.assertEqual(p.tz, dateutil.zoneinfo.gettz('Europe/Brussels'))
+        self.assertEqual(p.tz, gettz('Europe/Brussels'))
 
     def test_timestamp_tz_arg_dateutil_from_string(self):
-        import dateutil
+        from pandas.tslib import _dateutil_gettz as gettz
         p = Period('1/1/2005', freq='M').to_timestamp(tz='dateutil/Europe/Brussels')
-        self.assertEqual(p.tz, dateutil.zoneinfo.gettz('Europe/Brussels'))
+        self.assertEqual(p.tz, gettz('Europe/Brussels'))
 
     def test_timestamp_nat_tz(self):
         t = Period('NaT', freq='M').to_timestamp()
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 0c4961d80a5f4..6c20b02324688 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -417,9 +417,9 @@ def test_timestamp_to_datetime_explicit_pytz(self):
     def test_timestamp_to_datetime_explicit_dateutil(self):
         _skip_if_windows_python_3()
         tm._skip_if_no_dateutil()
-        import dateutil
+        from pandas.tslib import _dateutil_gettz as gettz
         rng = date_range('20090415', '20090519',
-                         tz=dateutil.zoneinfo.gettz('US/Eastern'))
+                         tz=gettz('US/Eastern'))
 
         stamp = rng[0]
         dtval = stamp.to_pydatetime()
@@ -1807,7 +1807,7 @@ def test_append_concat_tz_explicit_pytz(self):
     def test_append_concat_tz_dateutil(self):
         # GH 2938
         tm._skip_if_no_dateutil()
-        from dateutil.zoneinfo import gettz as timezone
+        from pandas.tslib import _dateutil_gettz as timezone
 
         rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
                          tz='dateutil/US/Eastern')
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 66f14bfb0346a..2b45718d1f9ea 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -41,7 +41,11 @@ from datetime import time as datetime_time
 # dateutil compat
 from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal, tzfile as _dateutil_tzfile,
                          tzutc as _dateutil_tzutc)
-from dateutil.zoneinfo import gettz as _dateutil_gettz
+from pandas.compat import is_platform_windows
+if is_platform_windows():
+    from dateutil.zoneinfo import gettz as _dateutil_gettz
+else:
+    from dateutil.tz import gettz as _dateutil_gettz
 
 from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
 from pandas.compat import parse_date, string_types, PY3, iteritems

From 3c66b4adff3127c3ff7eb6337c9b1f24f8fb66b7 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 15 May 2015 12:33:27 -0400
Subject: [PATCH 209/239] DOC: fixup whatsnew

---
 doc/source/whatsnew/v0.17.0.txt | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 785f35083a24e..2a4a408643451 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -26,26 +26,6 @@ New features
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
-- Add helper functions to check for OS running on
-
-  from pandas.compat import is_platform_windows
-  if is_platform_window():
-      pass
-
-- Use dateutil.tz.gettz() after upstream recommandations
-
-  python-dateutil provides two implementations of gettz().
-  "dateutil.tz.gettz()" tries to load zone information from system provided data and fals back to 
-  an included tarball, where as "dateutil.zoneinfo.gettz() loads directly from the tarball. Using the later on systems
-  which aren't providing included zone informations (e.g. Fedora or Gentoo) breaks (#9059, #8639, #9663 and #10121)
-  As stated by upstream in https://github.com/dateutil/dateutil/issues/11#issuecomment-70769019 only the former should be
-  used by library consumers.
-
-  For compatibility in pandas following code should be used
-
-  from pandas.tslib import _dateutil_gettz as gettz
-  tz = gettz('Europe/Brussels')
-
 .. _whatsnew_0170.api:
 
 Backwards incompatible API changes
@@ -86,4 +66,4 @@ Bug Fixes
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
-- Bug in dateutil.tz.gettz() vs. dateutil.zoneinfo.gettz() usage which creates problems on systems solely rely on systems timezone data (:issue:`9123`, :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
+- Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)

From e1565bf9e0ac637702c3693c3bcd52ecec0ae279 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 15 May 2015 16:57:30 -0400
Subject: [PATCH 210/239] TST: windows tests for allow for int/long #10050

---
 pandas/tseries/tests/test_timedeltas.py | 26 ++++++++------
 pandas/tseries/tests/test_tslib.py      | 48 +++++++++++--------------
 2 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index ae868af60ae63..c5843862306ca 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -309,6 +309,10 @@ class Other:
 
     def test_fields(self):
 
+        def check(value):
+            # that we are int/long like
+            self.assertTrue(isinstance(value, (int, long)))
+
         # compat to datetime.timedelta
         rng = to_timedelta('1 days, 10:11:12')
         self.assertEqual(rng.days, 1)
@@ -321,10 +325,10 @@ def test_fields(self):
         self.assertRaises(AttributeError, lambda : rng.milliseconds)
 
         # GH 10050
-        self.assertTrue(isinstance(rng.days, int))
-        self.assertTrue(isinstance(rng.seconds, int))
-        self.assertTrue(isinstance(rng.microseconds, int))
-        self.assertTrue(isinstance(rng.nanoseconds, int))
+        check(rng.days)
+        check(rng.seconds)
+        check(rng.microseconds)
+        check(rng.nanoseconds)
 
         td = Timedelta('-1 days, 10:11:12')
         self.assertEqual(abs(td), Timedelta('13:48:48'))
@@ -353,13 +357,13 @@ def test_fields(self):
         self.assertEqual(tup.nanoseconds, 0)
 
         # GH 10050
-        self.assertTrue(isinstance(tup.days, int))
-        self.assertTrue(isinstance(tup.hours, int))
-        self.assertTrue(isinstance(tup.minutes, int))
-        self.assertTrue(isinstance(tup.seconds, int))
-        self.assertTrue(isinstance(tup.milliseconds, int))
-        self.assertTrue(isinstance(tup.microseconds, int))
-        self.assertTrue(isinstance(tup.nanoseconds, int))
+        check(tup.days)
+        check(tup.hours)
+        check(tup.minutes)
+        check(tup.seconds)
+        check(tup.milliseconds)
+        check(tup.microseconds)
+        check(tup.nanoseconds)
 
         tup = Timedelta('-1 days 1 us').components
         self.assertEqual(tup.days, -2)
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
index 6b3dd63e6415b..27e5b927f9719 100644
--- a/pandas/tseries/tests/test_tslib.py
+++ b/pandas/tseries/tests/test_tslib.py
@@ -370,37 +370,29 @@ def test_today(self):
                             - ts_from_method_tz.tz_localize(None)) < delta)
 
     def test_fields(self):
+
+        def check(value, equal):
+            # that we are int/long like
+            self.assertTrue(isinstance(value, (int, long)))
+            self.assertEqual(value, equal)
+
         # GH 10050
         ts = Timestamp('2015-05-10 09:06:03.000100001')
-        self.assertEqual(ts.year, 2015)
-        self.assertTrue(isinstance(ts.year, int))
-        self.assertEqual(ts.month, 5)
-        self.assertTrue(isinstance(ts.month, int))
-        self.assertEqual(ts.day, 10)
-        self.assertTrue(isinstance(ts.day, int))
-        self.assertEqual(ts.hour, 9)
-        self.assertTrue(isinstance(ts.hour, int))
-        self.assertEqual(ts.minute, 6)
-        self.assertTrue(isinstance(ts.minute, int))
-        self.assertEqual(ts.second, 3)
-        self.assertTrue(isinstance(ts.second, int))
+        check(ts.year, 2015)
+        check(ts.month, 5)
+        check(ts.day, 10)
+        check(ts.hour, 9)
+        check(ts.minute, 6)
+        check(ts.second, 3)
         self.assertRaises(AttributeError, lambda : ts.millisecond)
-        self.assertEqual(ts.microsecond, 100)
-        self.assertTrue(isinstance(ts.microsecond, int))
-        self.assertEqual(ts.nanosecond, 1)
-        self.assertTrue(isinstance(ts.nanosecond, int))
-        self.assertEqual(ts.dayofweek, 6)
-        self.assertTrue(isinstance(ts.dayofweek, int))
-        self.assertEqual(ts.quarter, 2)
-        self.assertTrue(isinstance(ts.quarter, int))
-        self.assertEqual(ts.dayofyear, 130)
-        self.assertTrue(isinstance(ts.dayofyear, int))
-        self.assertEqual(ts.week, 19)
-        self.assertTrue(isinstance(ts.week, int))
-        self.assertEqual(ts.daysinmonth, 31)
-        self.assertTrue(isinstance(ts.days_in_month, int))
-        self.assertEqual(ts.daysinmonth, 31)
-        self.assertTrue(isinstance(ts.daysinmonth, int))
+        check(ts.microsecond, 100)
+        check(ts.nanosecond, 1)
+        check(ts.dayofweek, 6)
+        check(ts.quarter, 2)
+        check(ts.dayofyear, 130)
+        check(ts.week, 19)
+        check(ts.daysinmonth, 31)
+        check(ts.daysinmonth, 31)
 
     def test_nat_fields(self):
         # GH 10050

From cf683533b7c3aa9f076689d9c17cea7a3991bf69 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Thu, 14 May 2015 15:30:58 -0700
Subject: [PATCH 211/239] DOC: improve binary operator docs (fixes #10093)

---
 pandas/core/ops.py        | 78 ++++++++++++++++++++++++++++++++++++---
 pandas/core/panel.py      | 36 ++++++++++++++++--
 pandas/tests/test_base.py | 20 ++++++++++
 3 files changed, 126 insertions(+), 8 deletions(-)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index a4c9bff3dd97f..0b62eb1e53ddb 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -213,7 +213,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None,
 
     Parameters
     ----------
-    flex_arith_method : function (optional)
+    flex_arith_method : function
         factory for special arithmetic methods, with op string:
         f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs)
     radd_func :  function (optional)
@@ -703,12 +703,35 @@ def _radd_compat(left, right):
 
     return output
 
+_op_descriptions = {'add': {'op': '+', 'desc': 'Addition', 'reversed': False, 'reverse': 'radd'},
+                    'sub': {'op': '-', 'desc': 'Subtraction', 'reversed': False, 'reverse': 'rsub'},
+                    'mul': {'op': '*', 'desc': 'Multiplication', 'reversed': False, 'reverse': 'rmul'},
+                    'mod': {'op': '%', 'desc': 'Modulo', 'reversed': False, 'reverse': 'rmod'},
+                    'pow': {'op': '**', 'desc': 'Exponential power', 'reversed': False, 'reverse': 'rpow'},
+                    'truediv': {'op': '/', 'desc': 'Floating division', 'reversed': False, 'reverse': 'rtruediv'},
+                    'floordiv': {'op': '//', 'desc': 'Integer division', 'reversed': False, 'reverse': 'rfloordiv'}}
+
+_op_names = list(_op_descriptions.keys())
+for k in _op_names:
+    reverse_op = _op_descriptions[k]['reverse']
+    _op_descriptions[reverse_op] = _op_descriptions[k].copy()
+    _op_descriptions[reverse_op]['reversed'] = True
+    _op_descriptions[reverse_op]['reverse'] = k
 
 def _flex_method_SERIES(op, name, str_rep, default_axis=None,
                         fill_zeros=None, **eval_kwargs):
+    op_name = name.replace('__', '')
+    op_desc = _op_descriptions[op_name]
+    if op_desc['reversed']:
+        equiv = 'other ' + op_desc['op'] + ' series'
+    else:
+        equiv = 'series ' + op_desc['op'] + ' other'
+
     doc = """
-    Binary operator %s with support to substitute a fill_value for missing data
-    in one of the inputs
+    %s of series and other, element-wise (binary operator `%s`).
+
+    Equivalent to ``%s``, but with support to substitute a fill_value for
+    missing data in one of the inputs.
 
     Parameters
     ----------
@@ -723,7 +746,11 @@ def _flex_method_SERIES(op, name, str_rep, default_axis=None,
     Returns
     -------
     result : Series
-    """ % name
+
+    See also
+    --------
+    Series.%s
+    """ % (op_desc['desc'], op_name, equiv, op_desc['reverse'])
 
     @Appender(doc)
     def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
@@ -813,7 +840,48 @@ def na_op(x, y):
 
         return result
 
-    @Appender(_arith_doc_FRAME % name)
+    if name in _op_descriptions:
+        op_name = name.replace('__', '')
+        op_desc = _op_descriptions[op_name]
+        if op_desc['reversed']:
+            equiv = 'other ' + op_desc['op'] + ' dataframe'
+        else:
+            equiv = 'dataframe ' + op_desc['op'] + ' other'
+
+        doc = """
+        %s of dataframe and other, element-wise (binary operator `%s`).
+
+        Equivalent to ``%s``, but with support to substitute a fill_value for
+        missing data in one of the inputs.
+
+        Parameters
+        ----------
+        other : Series, DataFrame, or constant
+        axis : {0, 1, 'index', 'columns'}
+            For Series input, axis to match Series index on
+        fill_value : None or float value, default None
+            Fill missing (NaN) values with this value. If both DataFrame locations are
+            missing, the result will be missing
+        level : int or name
+            Broadcast across a level, matching Index values on the
+            passed MultiIndex level
+
+        Notes
+        -----
+        Mismatched indices will be unioned together
+
+        Returns
+        -------
+        result : DataFrame
+
+        See also
+        --------
+        DataFrame.%s
+        """ % (op_desc['desc'], op_name, equiv, op_desc['reverse'])
+    else:
+        doc = _arith_doc_FRAME % name
+
+    @Appender(doc)
     def f(self, other, axis=default_axis, level=None, fill_value=None):
         if isinstance(other, pd.DataFrame):    # Another DataFrame
             return self._combine_frame(other, na_op, fill_value, level)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 1215efe3a4db6..778f6a22e558c 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -28,6 +28,8 @@
 import pandas.core.ops as ops
 import pandas.computation.expressions as expressions
 from pandas import lib
+from pandas.core.ops import _op_descriptions
+
 
 _shared_doc_kwargs = dict(
     axes='items, major_axis, minor_axis',
@@ -1435,7 +1437,7 @@ def _add_aggregate_operations(cls, use_numexpr=True):
 ----------
 other : %s or %s""" % (cls._constructor_sliced.__name__, cls.__name__) + """
 axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """
-Axis to broadcast over
+    Axis to broadcast over
 
 Returns
 -------
@@ -1457,8 +1459,36 @@ def na_op(x, y):
                 result = com._fill_zeros(result, x, y, name, fill_zeros)
                 return result
 
-            @Substitution(name)
-            @Appender(_agg_doc)
+            if name in _op_descriptions:
+                op_name = name.replace('__', '')
+                op_desc = _op_descriptions[op_name]
+                if op_desc['reversed']:
+                    equiv = 'other ' + op_desc['op'] + ' panel'
+                else:
+                    equiv = 'panel ' + op_desc['op'] + ' other'
+
+                _op_doc = """
+                %%s of series and other, element-wise (binary operator `%%s`).
+                Equivalent to ``%%s``.
+
+                Parameters
+                ----------
+                other : %s or %s""" % (cls._constructor_sliced.__name__, cls.__name__) + """
+                axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """
+                    Axis to broadcast over
+
+                Returns
+                -------
+                """ + cls.__name__ + """
+
+                See also
+                --------
+                """ + cls.__name__ + ".%s\n"
+                doc = _op_doc % (op_desc['desc'], op_name, equiv, op_desc['reverse'])
+            else:
+                doc = _agg_doc % name
+
+            @Appender(doc)
             def f(self, other, axis=0):
                 return self._combine(other, na_op, axis=axis)
             f.__name__ = name
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index b91c46377267a..e9526f9fad1ac 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -244,6 +244,26 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False):
                     else:
                         self.assertRaises(AttributeError, lambda : getattr(o,op))
 
+    def test_binary_ops_docs(self):
+        from pandas import DataFrame, Panel
+        op_map = {'add': '+',
+                  'sub': '-',
+                  'mul': '*',
+                  'mod': '%',
+                  'pow': '**',
+                  'truediv': '/',
+                  'floordiv': '//'}
+        for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv', 'floordiv']:
+            for klass in [Series, DataFrame, Panel]:
+                operand1 = klass.__name__.lower()
+                operand2 = 'other'
+                op = op_map[op_name]
+                expected_str = ' '.join([operand1, op, operand2])
+                self.assertTrue(expected_str in getattr(klass, op_name).__doc__)
+
+                # reverse version of the binary ops
+                expected_str = ' '.join([operand2, op, operand1])
+                self.assertTrue(expected_str in getattr(klass, 'r' + op_name).__doc__)
 
 class TestIndexOps(Ops):
 

From a963abd6d6e7e76acd0102ea6b688af851d47375 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 16 May 2015 08:28:30 +0900
Subject: [PATCH 212/239] TST: use compat.long in test_tslib for py3

---
 pandas/tseries/tests/test_timedeltas.py | 4 +++-
 pandas/tseries/tests/test_tslib.py      | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index c5843862306ca..948a0be91b276 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -23,6 +23,8 @@
 import pandas.util.testing as tm
 from numpy.random import rand, randn
 from pandas import _np_version_under1p8
+import pandas.compat as compat
+
 
 iNaT = tslib.iNaT
 
@@ -311,7 +313,7 @@ def test_fields(self):
 
         def check(value):
             # that we are int/long like
-            self.assertTrue(isinstance(value, (int, long)))
+            self.assertTrue(isinstance(value, (int, compat.long)))
 
         # compat to datetime.timedelta
         rng = to_timedelta('1 days, 10:11:12')
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
index 27e5b927f9719..341450f504e2a 100644
--- a/pandas/tseries/tests/test_tslib.py
+++ b/pandas/tseries/tests/test_tslib.py
@@ -14,6 +14,8 @@
 import pandas.tseries.offsets as offsets
 import pandas.util.testing as tm
 from pandas.util.testing import assert_series_equal
+import pandas.compat as compat
+
 
 class TestTimestamp(tm.TestCase):
 
@@ -373,7 +375,7 @@ def test_fields(self):
 
         def check(value, equal):
             # that we are int/long like
-            self.assertTrue(isinstance(value, (int, long)))
+            self.assertTrue(isinstance(value, (int, compat.long)))
             self.assertEqual(value, equal)
 
         # GH 10050

From 605dc5ae79df8a92cf5df19a881583be1839942e Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 5 May 2015 00:36:56 -0700
Subject: [PATCH 213/239] BUG: Panel.from_dict does not set dtype when
 specified

---
 doc/source/whatsnew/v0.17.0.txt | 1 +
 pandas/core/frame.py            | 2 ++
 pandas/core/panel.py            | 4 +++-
 pandas/tests/test_panel.py      | 6 ++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 2a4a408643451..3d5a2697e0e84 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -63,6 +63,7 @@ Bug Fixes
 - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
 
 
+- Bug where Panel.from_dict does not set dtype when specified (:issue:`10058`)
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 21cabdbe03951..ed01323eb9a27 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -662,6 +662,8 @@ def from_dict(cls, data, orient='columns', dtype=None):
             The "orientation" of the data. If the keys of the passed dict
             should be the columns of the resulting DataFrame, pass 'columns'
             (default). Otherwise if the keys should be rows, pass 'index'.
+        dtype : dtype, default None
+            Data type to force, otherwise infer
 
         Returns
         -------
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 778f6a22e558c..580510829baff 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -239,7 +239,8 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None):
             (default). Otherwise if the columns of the values of the passed
             DataFrame objects should be the items (which in the case of
             mixed-dtype data you should do), instead pass 'minor'
-
+        dtype : dtype, default None
+            Data type to force, otherwise infer
 
         Returns
         -------
@@ -1383,6 +1384,7 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None):
                 result[key] = None
 
         axes_dict['data'] = result
+        axes_dict['dtype'] = dtype
         return axes_dict
 
     @staticmethod
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 236bdc8a98ff4..d7d83887298b1 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -962,6 +962,12 @@ def _check_dtype(panel, dtype):
             panel = Panel(np.random.randn(2,10,5),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype)
             _check_dtype(panel,dtype)
 
+        for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
+            df1 = DataFrame(np.random.randn(2, 5), index=lrange(2), columns=lrange(5))
+            df2 = DataFrame(np.random.randn(2, 5), index=lrange(2), columns=lrange(5))
+            panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype)
+            _check_dtype(panel, dtype)
+
     def test_constructor_fails_with_not_3d_input(self):
         with tm.assertRaisesRegexp(ValueError,
                                    "The number of dimensions required is 3"):

From 1563c1b8a7c59cb68a94b9a4626cdab7833255c4 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 18 Apr 2015 09:58:29 +0900
Subject: [PATCH 214/239] BUG: Index.name is lost during timedelta ops

---
 doc/source/whatsnew/v0.17.0.txt   |  4 ++++
 pandas/core/common.py             | 15 +++++++++++----
 pandas/tests/test_common.py       | 21 +++++++++++++++++++++
 pandas/tseries/index.py           |  6 +++++-
 pandas/tseries/tdi.py             |  5 ++++-
 pandas/tseries/tests/test_base.py | 30 +++++++++++++++---------------
 6 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 3d5a2697e0e84..b835733db6f00 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -68,3 +68,7 @@ Bug Fixes
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
 - Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
+
+
+
+- Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 3c92300d1f9a5..1c9326c047a79 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -3322,10 +3322,17 @@ def save(obj, path):  # TODO remove in 0.13
 
 
 def _maybe_match_name(a, b):
-    a_name = getattr(a, 'name', None)
-    b_name = getattr(b, 'name', None)
-    if a_name == b_name:
-        return a_name
+    a_has = hasattr(a, 'name')
+    b_has = hasattr(b, 'name')
+    if a_has and b_has:
+        if a.name == b.name:
+            return a.name
+        else:
+            return None
+    elif a_has:
+        return a.name
+    elif b_has:
+        return b.name
     return None
 
 def _random_state(state=None):
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 3282a36bda7b8..c3d39fcdf906f 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -545,6 +545,27 @@ def test_random_state():
         com._random_state(5.5)
 
 
+def test_maybe_match_name():
+
+    matched = com._maybe_match_name(Series([1], name='x'), Series([2], name='x'))
+    assert(matched == 'x')
+
+    matched = com._maybe_match_name(Series([1], name='x'), Series([2], name='y'))
+    assert(matched is None)
+
+    matched = com._maybe_match_name(Series([1]), Series([2], name='x'))
+    assert(matched is None)
+
+    matched = com._maybe_match_name(Series([1], name='x'), Series([2]))
+    assert(matched is None)
+
+    matched = com._maybe_match_name(Series([1], name='x'), [2])
+    assert(matched == 'x')
+
+    matched = com._maybe_match_name([1], Series([2], name='y'))
+    assert(matched == 'y')
+
+
 class TestTake(tm.TestCase):
     # standard incompatible fill error
     fill_error = re.compile("Incompatible type for fill_value")
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index f3803a04baf01..bd0869b9525b7 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -653,14 +653,18 @@ def _sub_datelike(self, other):
 
     def _add_delta(self, delta):
         from pandas import TimedeltaIndex
+        name = self.name
+
         if isinstance(delta, (Tick, timedelta, np.timedelta64)):
             new_values = self._add_delta_td(delta)
         elif isinstance(delta, TimedeltaIndex):
             new_values = self._add_delta_tdi(delta)
+            # update name when delta is Index
+            name = com._maybe_match_name(self, delta)
         else:
             new_values = self.astype('O') + delta
         tz = 'UTC' if self.tz is not None else None
-        result = DatetimeIndex(new_values, tz=tz, freq='infer')
+        result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer')
         utc = _utc()
         if self.tz is not None and self.tz is not utc:
             result = result.tz_convert(self.tz)
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index fd97ca4c45fc0..1443c22909689 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -281,12 +281,15 @@ def __setstate__(self, state):
     def _add_delta(self, delta):
         if isinstance(delta, (Tick, timedelta, np.timedelta64)):
             new_values = self._add_delta_td(delta)
+            name = self.name
         elif isinstance(delta, TimedeltaIndex):
             new_values = self._add_delta_tdi(delta)
+            # update name when delta is index
+            name = com._maybe_match_name(self, delta)
         else:
             raise ValueError("cannot add the type {0} to a TimedeltaIndex".format(type(delta)))
 
-        result = TimedeltaIndex(new_values, freq='infer')
+        result = TimedeltaIndex(new_values, freq='infer', name=name)
         return result
 
     def _evaluate_with_timedelta_like(self, other, op, opstr):
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
index d1b986e7a7a1c..55482401a20f4 100644
--- a/pandas/tseries/tests/test_base.py
+++ b/pandas/tseries/tests/test_base.py
@@ -634,27 +634,27 @@ def test_dti_dti_deprecated_ops(self):
     def test_dti_tdi_numeric_ops(self):
 
         # These are normally union/diff set-like ops
-        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'], name='foo')
-        dti = date_range('20130101',periods=3, name='bar')
+        tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
+        dti = date_range('20130101', periods=3, name='bar')
         td = Timedelta('1 days')
         dt = Timestamp('20130101')
 
         result = tdi - tdi
         expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo')
-        tm.assert_index_equal(result, expected, check_names=False) # must be foo
+        tm.assert_index_equal(result, expected)
 
         result = tdi + tdi
         expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo')
-        tm.assert_index_equal(result, expected, check_names=False) # must be foo
+        tm.assert_index_equal(result, expected)
 
-        result = dti - tdi
+        result = dti - tdi   # name will be reset
         expected = DatetimeIndex(['20121231', pd.NaT, '20130101'])
         tm.assert_index_equal(result, expected)
 
     def test_addition_ops(self):
 
         # with datetimes/timedelta and tdi/dti
-        tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'], name='foo')
+        tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
         dti = date_range('20130101', periods=3, name='bar')
         td = Timedelta('1 days')
         dt = Timestamp('20130101')
@@ -669,11 +669,11 @@ def test_addition_ops(self):
 
         result = td + tdi
         expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo')
-        tm.assert_index_equal(result, expected, check_names=False) # must be foo
+        tm.assert_index_equal(result, expected)
 
         result = tdi + td
         expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo')
-        tm.assert_index_equal(result,expected, check_names=False)  # must be foo
+        tm.assert_index_equal(result, expected)
 
         # unequal length
         self.assertRaises(ValueError, lambda : tdi + dti[0:1])
@@ -685,21 +685,21 @@ def test_addition_ops(self):
         # this is a union!
         #self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi)
 
-        result = tdi + dti
+        result = tdi + dti   # name will be reset
         expected = DatetimeIndex(['20130102', pd.NaT, '20130105'])
-        tm.assert_index_equal(result,expected)
+        tm.assert_index_equal(result, expected)
 
-        result = dti + tdi
-        expected = DatetimeIndex(['20130102',pd.NaT,'20130105'])
-        tm.assert_index_equal(result,expected)
+        result = dti + tdi   # name will be reset
+        expected = DatetimeIndex(['20130102', pd.NaT, '20130105'])
+        tm.assert_index_equal(result, expected)
 
         result = dt + td
         expected = Timestamp('20130102')
-        self.assertEqual(result,expected)
+        self.assertEqual(result, expected)
 
         result = td + dt
         expected = Timestamp('20130102')
-        self.assertEqual(result,expected)
+        self.assertEqual(result, expected)
 
     def test_value_counts_unique(self):
         # GH 7735

From 46181f559dabd6af818326b3a3f86d2531d29e63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yoshiki=20V=C3=A1zquez=20Baeza?=
 <yoshiki.vazquezbaeza@colorado.edu>
Date: Mon, 18 May 2015 12:39:31 -0700
Subject: [PATCH 215/239] DOC: Reorder arguments in shared fillna docstring

The docstring listed 'method' before 'value' which is not consistent
with the order of the arguments when calling the method.
---
 pandas/core/generic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a560dd4c00be7..b747f0a2ceacb 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2398,15 +2398,15 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
 
         Parameters
         ----------
-        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
-            Method to use for filling holes in reindexed Series
-            pad / ffill: propagate last valid observation forward to next valid
-            backfill / bfill: use NEXT valid observation to fill gap
         value : scalar, dict, Series, or DataFrame
             Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of
             values specifying which value to use for each index (for a Series) or
             column (for a DataFrame). (values not in the dict/Series/DataFrame will not be
             filled). This value cannot be a list.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
         axis : %(axes_single_arg)s
         inplace : boolean, default False
             If True, fill in place. Note: this will modify any

From d4b1165b65581cb3d5cb2441e72923c423376c2c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 19 May 2015 06:37:01 -0400
Subject: [PATCH 216/239] DOC: fix blosc display in install.rst

---
 doc/source/install.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index 3aa6b338e3397..b3f86db5e3e59 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -255,7 +255,7 @@ Optional Dependencies
    * Alternative Excel writer.
 * `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3
   access.
-* `blosc <https://pypi.python.org/pypi/blosc`__: for msgpack compression using ``blosc``
+* `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
   <http://qt-project.org/wiki/Category:LanguageBindings::PySide>`__, `pygtk

From fdf595d9a8897ec6a472cf034e529b55d1493cbb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 13 May 2015 21:53:45 -0500
Subject: [PATCH 217/239] BUG: fix Series.plot label setting

---
 doc/source/whatsnew/v0.17.0.txt |  4 ++++
 pandas/tests/test_graphics.py   | 23 +++++++++++++++++++++++
 pandas/tools/plotting.py        |  2 +-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index b835733db6f00..f74bd70abb3a8 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -72,3 +72,7 @@ Bug Fixes
 
 
 - Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
+
+- Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
+
+
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 33c88b0e3b4b7..4c9d5a9207dd7 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -553,6 +553,29 @@ def test_ts_area_lim(self):
         self.assertEqual(xmin, line[0])
         self.assertEqual(xmax, line[-1])
 
+    def test_label(self):
+        s = Series([1, 2])
+        ax = s.plot(label='LABEL', legend=True)
+        self._check_legend_labels(ax, labels=['LABEL'])
+        self.plt.close()
+        ax = s.plot(legend=True)
+        self._check_legend_labels(ax, labels=['None'])
+        self.plt.close()
+        # get name from index
+        s.name = 'NAME'
+        ax = s.plot(legend=True)
+        self._check_legend_labels(ax, labels=['NAME'])
+        self.plt.close()
+        # override the default
+        ax = s.plot(legend=True, label='LABEL')
+        self._check_legend_labels(ax, labels=['LABEL'])
+        self.plt.close()
+        # Add lebel info, but don't draw
+        ax = s.plot(legend=False, label='LABEL')
+        self.assertEqual(ax.get_legend(), None)  # Hasn't been drawn
+        ax.legend()  # draw it
+        self._check_legend_labels(ax, labels=['LABEL'])
+
     def test_line_area_nan_series(self):
         values = [1, 2, np.nan, 3]
         s = Series(values)
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index f92f398d9be94..04dd4d3395684 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -999,7 +999,7 @@ def _compute_plot_data(self):
         data = self.data
 
         if isinstance(data, Series):
-            label = self.kwds.pop('label', None)
+            label = self.label
             if label is None and data.name is None:
                 label = 'None'
             data = data.to_frame(name=label)

From 82a0d5c098c96c13b74974f0f24743c4efc9fde4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 9 May 2015 11:40:56 +0200
Subject: [PATCH 218/239] DOC: consistent imports (GH9886) part II

---
 doc/source/10min.rst       |  24 ++---
 doc/source/advanced.rst    | 110 ++++++++++----------
 doc/source/basics.rst      | 201 ++++++++++++++++++-------------------
 doc/source/categorical.rst | 144 +++++++++++++-------------
 doc/source/computation.rst |  34 +++----
 5 files changed, 250 insertions(+), 263 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index 94c2d921eb116..1714e00030026 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -6,18 +6,16 @@
    :suppress:
 
    import numpy as np
-   import random
+   import pandas as pd
    import os
    np.random.seed(123456)
-   from pandas import options
-   import pandas as pd
    np.set_printoptions(precision=4, suppress=True)
    import matplotlib
    try:
       matplotlib.style.use('ggplot')
    except AttributeError:
-      options.display.mpl_style = 'default'
-   options.display.max_rows=15
+      pd.options.display.mpl_style = 'default'
+   pd.options.display.max_rows = 15
 
    #### portions of this were borrowed from the
    #### Pandas cheatsheet
@@ -298,7 +296,7 @@ Using the :func:`~Series.isin` method for filtering:
 .. ipython:: python
 
    df2 = df.copy()
-   df2['E']=['one', 'one','two','three','four','three']
+   df2['E'] = ['one', 'one','two','three','four','three']
    df2
    df2[df2['E'].isin(['two','four'])]
 
@@ -310,7 +308,7 @@ by the indexes
 
 .. ipython:: python
 
-   s1 = pd.Series([1,2,3,4,5,6],index=pd.date_range('20130102',periods=6))
+   s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
    s1
    df['F'] = s1
 
@@ -359,7 +357,7 @@ returns a copy of the data.
 
 .. ipython:: python
 
-   df1 = df.reindex(index=dates[0:4],columns=list(df.columns) + ['E'])
+   df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
    df1.loc[dates[0]:dates[1],'E'] = 1
    df1
 
@@ -409,9 +407,9 @@ In addition, pandas automatically broadcasts along the specified dimension.
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)
+   s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
    s
-   df.sub(s,axis='index')
+   df.sub(s, axis='index')
 
 
 Apply
@@ -431,7 +429,7 @@ See more at :ref:`Histogramming and Discretization <basics.discretization>`
 
 .. ipython:: python
 
-   s = pd.Series(np.random.randint(0,7,size=10))
+   s = pd.Series(np.random.randint(0, 7, size=10))
    s
    s.value_counts()
 
@@ -516,9 +514,9 @@ See the :ref:`Grouping section <groupby>`
 .. ipython:: python
 
    df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
-                            'foo', 'bar', 'foo', 'foo'],
+                             'foo', 'bar', 'foo', 'foo'],
                       'B' : ['one', 'one', 'two', 'three',
-                            'two', 'two', 'one', 'three'],
+                             'two', 'two', 'one', 'three'],
                       'C' : np.random.randn(8),
                       'D' : np.random.randn(8)})
    df
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 262c439cde636..850f59c2713eb 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -6,15 +6,10 @@
    :suppress:
 
    import numpy as np
-   import random
-   np.random.seed(123456)
-   from pandas import *
-   options.display.max_rows=15
    import pandas as pd
-   randn = np.random.randn
-   randint = np.random.randint
+   np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
-   from pandas.compat import range, zip
+   pd.options.display.max_rows=15
 
 ******************************
 MultiIndex / Advanced Indexing
@@ -80,10 +75,10 @@ demo different ways to initialize MultiIndexes.
    tuples = list(zip(*arrays))
    tuples
 
-   index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
+   index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
    index
 
-   s = Series(randn(8), index=index)
+   s = pd.Series(np.random.randn(8), index=index)
    s
 
 When you want every pairing of the elements in two iterables, it can be easier
@@ -92,7 +87,7 @@ to use the ``MultiIndex.from_product`` function:
 .. ipython:: python
 
    iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
-   MultiIndex.from_product(iterables, names=['first', 'second'])
+   pd.MultiIndex.from_product(iterables, names=['first', 'second'])
 
 As a convenience, you can pass a list of arrays directly into Series or
 DataFrame to construct a MultiIndex automatically:
@@ -101,9 +96,9 @@ DataFrame to construct a MultiIndex automatically:
 
    arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
              np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]
-   s = Series(randn(8), index=arrays)
+   s = pd.Series(np.random.randn(8), index=arrays)
    s
-   df = DataFrame(randn(8, 4), index=arrays)
+   df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
    df
 
 All of the ``MultiIndex`` constructors accept a ``names`` argument which stores
@@ -119,9 +114,9 @@ of the index is up to you:
 
 .. ipython:: python
 
-   df = DataFrame(randn(3, 8), index=['A', 'B', 'C'], columns=index)
+   df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)
    df
-   DataFrame(randn(6, 6), index=index[:6], columns=index[:6])
+   pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])
 
 We've "sparsified" the higher levels of the indexes to make the console output a
 bit easier on the eyes.
@@ -131,7 +126,7 @@ tuples as atomic labels on an axis:
 
 .. ipython:: python
 
-   Series(randn(8), index=tuples)
+   pd.Series(np.random.randn(8), index=tuples)
 
 The reason that the ``MultiIndex`` matters is that it can allow you to do
 grouping, selection, and reshaping operations as we will describe below and in
@@ -282,16 +277,16 @@ As usual, **both sides** of the slicers are included as this is label indexing.
    def mklbl(prefix,n):
        return ["%s%s" % (prefix,i)  for i in range(n)]
 
-   miindex = MultiIndex.from_product([mklbl('A',4),
-                                      mklbl('B',2),
-                                      mklbl('C',4),
-                                      mklbl('D',2)])
-   micolumns = MultiIndex.from_tuples([('a','foo'),('a','bar'),
-                                       ('b','foo'),('b','bah')],
-                                        names=['lvl0', 'lvl1'])
-   dfmi = DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))),
-                    index=miindex,
-                    columns=micolumns).sortlevel().sortlevel(axis=1)
+   miindex = pd.MultiIndex.from_product([mklbl('A',4),
+                                         mklbl('B',2),
+                                         mklbl('C',4),
+                                         mklbl('D',2)])
+   micolumns = pd.MultiIndex.from_tuples([('a','foo'),('a','bar'),
+                                          ('b','foo'),('b','bah')],
+                                         names=['lvl0', 'lvl1'])
+   dfmi = pd.DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))),
+                       index=miindex,
+                       columns=micolumns).sortlevel().sortlevel(axis=1)
    dfmi
 
 Basic multi-index slicing using slices, lists, and labels.
@@ -418,9 +413,9 @@ instance:
 
 .. ipython:: python
 
-   midx = MultiIndex(levels=[['zero', 'one'], ['x','y']],
-                     labels=[[1,1,0,0],[1,0,1,0]])
-   df = DataFrame(randn(4,2), index=midx)
+   midx = pd.MultiIndex(levels=[['zero', 'one'], ['x','y']],
+                        labels=[[1,1,0,0],[1,0,1,0]])
+   df = pd.DataFrame(np.random.randn(4,2), index=midx)
    df
    df2 = df.mean(level=0)
    df2
@@ -471,7 +466,7 @@ labels will be sorted lexicographically!
 .. ipython:: python
 
    import random; random.shuffle(tuples)
-   s = Series(randn(8), index=MultiIndex.from_tuples(tuples))
+   s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))
    s
    s.sortlevel(0)
    s.sortlevel(1)
@@ -509,13 +504,13 @@ an exception. Here is a concrete example to illustrate this:
 .. ipython:: python
 
    tuples = [('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b')]
-   idx = MultiIndex.from_tuples(tuples)
+   idx = pd.MultiIndex.from_tuples(tuples)
    idx.lexsort_depth
 
    reordered = idx[[1, 0, 3, 2]]
    reordered.lexsort_depth
 
-   s = Series(randn(4), index=reordered)
+   s = pd.Series(np.random.randn(4), index=reordered)
    s.ix['a':'a']
 
 However:
@@ -540,7 +535,7 @@ index positions. ``take`` will also accept negative integers as relative positio
 
 .. ipython:: python
 
-   index = Index(randint(0, 1000, 10))
+   index = pd.Index(np.random.randint(0, 1000, 10))
    index
 
    positions = [0, 9, 3]
@@ -548,7 +543,7 @@ index positions. ``take`` will also accept negative integers as relative positio
    index[positions]
    index.take(positions)
 
-   ser = Series(randn(10))
+   ser = pd.Series(np.random.randn(10))
 
    ser.iloc[positions]
    ser.take(positions)
@@ -558,7 +553,7 @@ row or column positions.
 
 .. ipython:: python
 
-   frm = DataFrame(randn(5, 3))
+   frm = pd.DataFrame(np.random.randn(5, 3))
 
    frm.take([1, 4, 3])
 
@@ -569,11 +564,11 @@ intended to work on boolean indices and may return unexpected results.
 
 .. ipython:: python
 
-   arr = randn(10)
+   arr = np.random.randn(10)
    arr.take([False, False, True, True])
    arr[[0, 1]]
 
-   ser = Series(randn(10))
+   ser = pd.Series(np.random.randn(10))
    ser.take([False, False, True, True])
    ser.ix[[0, 1]]
 
@@ -583,14 +578,14 @@ faster than fancy indexing.
 
 .. ipython::
 
-   arr = randn(10000, 5)
+   arr = np.random.randn(10000, 5)
    indexer = np.arange(10000)
    random.shuffle(indexer)
 
    timeit arr[indexer]
    timeit arr.take(indexer, axis=0)
 
-   ser = Series(arr[:, 0])
+   ser = pd.Series(arr[:, 0])
    timeit ser.ix[indexer]
    timeit ser.take(indexer)
 
@@ -608,10 +603,9 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv
 
 .. ipython:: python
 
-   df = DataFrame({'A' : np.arange(6),
-                   'B' : Series(list('aabbca')).astype('category',
-                                                       categories=list('cab'))
-                  })
+   df = pd.DataFrame({'A': np.arange(6),
+                      'B': list('aabbca')})
+   df['B'] = df['B'].astype('category', categories=list('cab'))
    df
    df.dtypes
    df.B.cat.categories
@@ -669,15 +663,15 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
 
    .. code-block:: python
 
-      In [10]: df3 = DataFrame({'A' : np.arange(6),
-                                'B' : Series(list('aabbca')).astype('category',
-                                                                    categories=list('abc'))
-                               }).set_index('B')
+      In [9]: df3 = pd.DataFrame({'A' : np.arange(6),
+                                  'B' : pd.Series(list('aabbca')).astype('category')})
+
+      In [11]: df3 = df3.set_index('B')
 
       In [11]: df3.index
       Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
 
-      In [12]: pd.concat([df2,df3]
+      In [12]: pd.concat([df2, df3]
       TypeError: categories must match existing categories when appending
 
 .. _indexing.float64index:
@@ -702,9 +696,9 @@ same.
 
 .. ipython:: python
 
-   indexf = Index([1.5, 2, 3, 4.5, 5])
+   indexf = pd.Index([1.5, 2, 3, 4.5, 5])
    indexf
-   sf = Series(range(5),index=indexf)
+   sf = pd.Series(range(5), index=indexf)
    sf
 
 Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``)
@@ -746,17 +740,17 @@ In non-float indexes, slicing using floats will raise a ``TypeError``
 
 .. code-block:: python
 
-   In [1]: Series(range(5))[3.5]
+   In [1]: pd.Series(range(5))[3.5]
    TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index)
 
-   In [1]: Series(range(5))[3.5:4.5]
+   In [1]: pd.Series(range(5))[3.5:4.5]
    TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index)
 
 Using a scalar float indexer will be deprecated in a future version, but is allowed for now.
 
 .. code-block:: python
 
-   In [3]: Series(range(5))[3.0]
+   In [3]: pd.Series(range(5))[3.0]
    Out[3]: 3
 
 Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat
@@ -765,12 +759,12 @@ example be millisecond offsets.
 
 .. ipython:: python
 
-   dfir = concat([DataFrame(randn(5,2),
-                     index=np.arange(5) * 250.0,
-                     columns=list('AB')),
-                  DataFrame(randn(6,2),
-                     index=np.arange(4,10) * 250.1,
-                     columns=list('AB'))])
+   dfir = pd.concat([pd.DataFrame(np.random.randn(5,2),
+                                  index=np.arange(5) * 250.0,
+                                  columns=list('AB')),
+                     pd.DataFrame(np.random.randn(6,2),
+                                  index=np.arange(4,10) * 250.1,
+                                  columns=list('AB'))])
    dfir
 
 Selection operations then will always work on a value basis, for all selection operators.
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 6c743352a34ae..d16feb3a6c448 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -1,16 +1,14 @@
 .. currentmodule:: pandas
-.. _basics:
 
 .. ipython:: python
    :suppress:
 
    import numpy as np
-   from pandas import *
-   randn = np.random.randn
+   import pandas as pd
    np.set_printoptions(precision=4, suppress=True)
-   from pandas.compat import lrange
-   options.display.max_rows=15
+   pd.options.display.max_rows = 15
 
+.. _basics:
 
 ==============================
  Essential Basic Functionality
@@ -22,13 +20,13 @@ the previous section:
 
 .. ipython:: python
 
-   index = date_range('1/1/2000', periods=8)
-   s = Series(randn(5), index=['a', 'b', 'c', 'd', 'e'])
-   df = DataFrame(randn(8, 3), index=index,
-                  columns=['A', 'B', 'C'])
-   wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'],
-              major_axis=date_range('1/1/2000', periods=5),
-              minor_axis=['A', 'B', 'C', 'D'])
+   index = pd.date_range('1/1/2000', periods=8)
+   s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
+   df = pd.DataFrame(np.random.randn(8, 3), index=index,
+                     columns=['A', 'B', 'C'])
+   wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
+                 major_axis=pd.date_range('1/1/2000', periods=5),
+                 minor_axis=['A', 'B', 'C', 'D'])
 
 .. _basics.head_tail:
 
@@ -41,7 +39,7 @@ of elements to display is five, but you may pass a custom number.
 
 .. ipython:: python
 
-   long_series = Series(randn(1000))
+   long_series = pd.Series(np.random.randn(1000))
    long_series.head()
    long_series.tail(3)
 
@@ -143,9 +141,9 @@ either match on the *index* or *columns* via the **axis** keyword:
 
 .. ipython:: python
 
-   df = DataFrame({'one' : Series(randn(3), index=['a', 'b', 'c']),
-                   'two' : Series(randn(4), index=['a', 'b', 'c', 'd']),
-                   'three' : Series(randn(3), index=['b', 'c', 'd'])})
+   df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
+                      'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
+                      'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
    df
    row = df.ix[1]
    column = df['two']
@@ -166,8 +164,8 @@ Furthermore you can align a level of a multi-indexed DataFrame with a Series.
 .. ipython:: python
 
    dfmi = df.copy()
-   dfmi.index = MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
-                                       names=['first','second'])
+   dfmi.index = pd.MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
+                                          names=['first','second'])
    dfmi.sub(column, axis=0, level='second')
 
 With Panel, describing the matching behavior is a bit more difficult, so
@@ -256,17 +254,17 @@ You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` prope
 .. ipython:: python
 
    df.empty
-   DataFrame(columns=list('ABC')).empty
+   pd.DataFrame(columns=list('ABC')).empty
 
 To evaluate single-element pandas objects in a boolean context, use the method
 :meth:`~DataFrame.bool`:
 
 .. ipython:: python
 
-   Series([True]).bool()
-   Series([False]).bool()
-   DataFrame([[True]]).bool()
-   DataFrame([[False]]).bool()
+   pd.Series([True]).bool()
+   pd.Series([False]).bool()
+   pd.DataFrame([[True]]).bool()
+   pd.DataFrame([[False]]).bool()
 
 .. warning::
 
@@ -327,8 +325,8 @@ equality to be True:
 
 .. ipython:: python
 
-   df1 = DataFrame({'col':['foo', 0, np.nan]})
-   df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])
+   df1 = pd.DataFrame({'col':['foo', 0, np.nan]})
+   df2 = pd.DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])
    df1.equals(df2)
    df1.equals(df2.sort())
 
@@ -348,10 +346,10 @@ which we illustrate:
 
 .. ipython:: python
 
-   df1 = DataFrame({'A' : [1., np.nan, 3., 5., np.nan],
-                    'B' : [np.nan, 2., 3., np.nan, 6.]})
-   df2 = DataFrame({'A' : [5., 2., 4., np.nan, 3., 7.],
-                    'B' : [np.nan, np.nan, 3., 4., 6., 8.]})
+   df1 = pd.DataFrame({'A' : [1., np.nan, 3., 5., np.nan],
+                       'B' : [np.nan, 2., 3., np.nan, 6.]})
+   df2 = pd.DataFrame({'A' : [5., 2., 4., np.nan, 3., 7.],
+                       'B' : [np.nan, np.nan, 3., 4., 6., 8.]})
    df1
    df2
    df1.combine_first(df2)
@@ -368,7 +366,7 @@ So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above:
 
 .. ipython:: python
 
-   combiner = lambda x, y: np.where(isnull(x), y, x)
+   combiner = lambda x, y: np.where(pd.isnull(x), y, x)
    df1.combine(df2, combiner)
 
 .. _basics.stats:
@@ -467,7 +465,7 @@ number of unique non-null values:
 
 .. ipython:: python
 
-   series = Series(randn(500))
+   series = pd.Series(np.random.randn(500))
    series[20:500] = np.nan
    series[10:20]  = 5
    series.nunique()
@@ -483,10 +481,10 @@ course):
 
 .. ipython:: python
 
-    series = Series(randn(1000))
+    series = pd.Series(np.random.randn(1000))
     series[::2] = np.nan
     series.describe()
-    frame = DataFrame(randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+    frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
     frame.ix[::2] = np.nan
     frame.describe()
 
@@ -503,7 +501,7 @@ summary of the number of unique values and most frequently occurring values:
 
 .. ipython:: python
 
-   s = Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])
+   s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])
    s.describe()
 
 Note that on a mixed-type DataFrame object, :meth:`~DataFrame.describe` will
@@ -512,7 +510,7 @@ categorical columns:
 
 .. ipython:: python
 
-    frame = DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
+    frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})
     frame.describe()
 
 This behaviour can be controlled by providing a list of types as ``include``/``exclude``
@@ -538,11 +536,11 @@ corresponding values:
 
 .. ipython:: python
 
-   s1 = Series(randn(5))
+   s1 = pd.Series(np.random.randn(5))
    s1
    s1.idxmin(), s1.idxmax()
 
-   df1 = DataFrame(randn(5,3), columns=['A','B','C'])
+   df1 = pd.DataFrame(np.random.randn(5,3), columns=['A','B','C'])
    df1
    df1.idxmin(axis=0)
    df1.idxmax(axis=1)
@@ -553,7 +551,7 @@ matching index:
 
 .. ipython:: python
 
-   df3 = DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))
+   df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))
    df3
    df3['A'].idxmin()
 
@@ -573,18 +571,18 @@ of a 1D array of values. It can also be used as a function on regular arrays:
 
    data = np.random.randint(0, 7, size=50)
    data
-   s = Series(data)
+   s = pd.Series(data)
    s.value_counts()
-   value_counts(data)
+   pd.value_counts(data)
 
 Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame:
 
 .. ipython:: python
 
-    s5 = Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])
+    s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])
     s5.mode()
-    df5 = DataFrame({"A": np.random.randint(0, 7, size=50),
-                     "B": np.random.randint(-10, 15, size=50)})
+    df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50),
+                        "B": np.random.randint(-10, 15, size=50)})
     df5.mode()
 
 
@@ -597,10 +595,10 @@ and :func:`qcut` (bins based on sample quantiles) functions:
 .. ipython:: python
 
    arr = np.random.randn(20)
-   factor = cut(arr, 4)
+   factor = pd.cut(arr, 4)
    factor
 
-   factor = cut(arr, [-5, -1, 0, 1, 5])
+   factor = pd.cut(arr, [-5, -1, 0, 1, 5])
    factor
 
 :func:`qcut` computes sample quantiles. For example, we could slice up some
@@ -609,16 +607,16 @@ normally distributed data into equal-size quartiles like so:
 .. ipython:: python
 
    arr = np.random.randn(30)
-   factor = qcut(arr, [0, .25, .5, .75, 1])
+   factor = pd.qcut(arr, [0, .25, .5, .75, 1])
    factor
-   value_counts(factor)
+   pd.value_counts(factor)
 
 We can also pass infinite values to define the bins:
 
 .. ipython:: python
 
    arr = np.random.randn(20)
-   factor = cut(arr, [-np.inf, 0, np.inf])
+   factor = pd.cut(arr, [-np.inf, 0, np.inf])
    factor
 
 .. _basics.apply:
@@ -647,8 +645,8 @@ maximum value for each column occurred:
 
 .. ipython:: python
 
-   tsdf = DataFrame(randn(1000, 3), columns=['A', 'B', 'C'],
-                    index=date_range('1/1/2000', periods=1000))
+   tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
+                       index=pd.date_range('1/1/2000', periods=1000))
    tsdf.apply(lambda x: x.idxmax())
 
 You may also pass additional arguments and keyword arguments to the :meth:`~DataFrame.apply`
@@ -671,14 +669,14 @@ Series operation on each column or row:
 .. ipython:: python
    :suppress:
 
-   tsdf = DataFrame(randn(10, 3), columns=['A', 'B', 'C'],
-                    index=date_range('1/1/2000', periods=10))
+   tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
+                       index=pd.date_range('1/1/2000', periods=10))
    tsdf.values[3:7] = np.nan
 
 .. ipython:: python
 
    tsdf
-   tsdf.apply(Series.interpolate)
+   tsdf.apply(pd.Series.interpolate)
 
 Finally, :meth:`~DataFrame.apply` takes an argument ``raw`` which is False by default, which
 converts each row or column into a Series before applying the function. When
@@ -718,9 +716,9 @@ to :ref:`merging/joining functionality <merging>`:
 
 .. ipython:: python
 
-   s = Series(['six', 'seven', 'six', 'seven', 'six'],
-              index=['a', 'b', 'c', 'd', 'e'])
-   t = Series({'six' : 6., 'seven' : 7.})
+   s = pd.Series(['six', 'seven', 'six', 'seven', 'six'],
+                 index=['a', 'b', 'c', 'd', 'e'])
+   t = pd.Series({'six' : 6., 'seven' : 7.})
    s
    s.map(t)
 
@@ -797,7 +795,7 @@ This is equivalent to the following
 
 .. ipython:: python
 
-   result = Panel(dict([ (ax,f(panel.loc[:,:,ax]))
+   result = pd.Panel(dict([ (ax, f(panel.loc[:,:,ax]))
                            for ax in panel.minor_axis ]))
    result
    result.loc[:,:,'ItemA']
@@ -823,7 +821,7 @@ Here is a simple example:
 
 .. ipython:: python
 
-   s = Series(randn(5), index=['a', 'b', 'c', 'd', 'e'])
+   s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
    s
    s.reindex(['e', 'b', 'f', 'd'])
 
@@ -909,7 +907,7 @@ It returns a tuple with both of the reindexed Series:
 
 .. ipython:: python
 
-   s = Series(randn(5), index=['a', 'b', 'c', 'd', 'e'])
+   s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
    s1 = s[:4]
    s2 = s[1:]
    s1.align(s2)
@@ -960,8 +958,8 @@ We illustrate these fill methods on a simple Series:
 
 .. ipython:: python
 
-   rng = date_range('1/3/2000', periods=8)
-   ts = Series(randn(8), index=rng)
+   rng = pd.date_range('1/3/2000', periods=8)
+   ts = pd.Series(np.random.randn(8), index=rng)
    ts2 = ts[[0, 3, 6]]
    ts
    ts2
@@ -1095,11 +1093,11 @@ For instance, a contrived way to transpose the DataFrame would be:
 
 .. ipython:: python
 
-   df2 = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
+   df2 = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    print(df2)
    print(df2.T)
 
-   df2_t = DataFrame(dict((idx,values) for idx, values in df2.iterrows()))
+   df2_t = pd.DataFrame(dict((idx,values) for idx, values in df2.iterrows()))
    print(df2_t)
 
 .. note::
@@ -1109,7 +1107,7 @@ For instance, a contrived way to transpose the DataFrame would be:
 
     .. ipython:: python
 
-      df_iter = DataFrame([[1, 1.0]], columns=['x', 'y'])
+      df_iter = pd.DataFrame([[1, 1.0]], columns=['x', 'y'])
       row = next(df_iter.iterrows())[1]
       print(row['x'].dtype)
       print(df_iter['x'].dtype)
@@ -1140,7 +1138,7 @@ This will return a Series, indexed like the existing Series.
 .. ipython:: python
 
    # datetime
-   s = Series(date_range('20130101 09:10:12',periods=4))
+   s = pd.Series(pd.date_range('20130101 09:10:12',periods=4))
    s
    s.dt.hour
    s.dt.second
@@ -1171,7 +1169,7 @@ The ``.dt`` accessor works for period and timedelta dtypes.
 .. ipython:: python
 
    # period
-   s = Series(period_range('20130101',periods=4,freq='D'))
+   s = pd.Series(pd.period_range('20130101', periods=4,freq='D'))
    s
    s.dt.year
    s.dt.day
@@ -1179,7 +1177,7 @@ The ``.dt`` accessor works for period and timedelta dtypes.
 .. ipython:: python
 
    # timedelta
-   s = Series(timedelta_range('1 day 00:00:05',periods=4,freq='s'))
+   s = pd.Series(pd.timedelta_range('1 day 00:00:05',periods=4,freq='s'))
    s
    s.dt.days
    s.dt.seconds
@@ -1200,7 +1198,7 @@ built-in string methods. For example:
 
  .. ipython:: python
 
-  s = Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
+  s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
   s.str.lower()
 
 Powerful pattern-matching methods are provided as well, but note that
@@ -1234,7 +1232,7 @@ determine the sort order:
 
 .. ipython:: python
 
-   df1 = DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})
+   df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})
    df1.sort_index(by='two')
 
 The ``by`` argument can take a list of column names, e.g.:
@@ -1265,12 +1263,12 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to
 
 .. ipython:: python
 
-   ser = Series([1, 2, 3])
+   ser = pd.Series([1, 2, 3])
    ser.searchsorted([0, 3])
    ser.searchsorted([0, 4])
    ser.searchsorted([1, 3], side='right')
    ser.searchsorted([1, 3], side='left')
-   ser = Series([3, 1, 2])
+   ser = pd.Series([3, 1, 2])
    ser.searchsorted([0, 3], sorter=np.argsort(ser))
 
 .. _basics.nsorted:
@@ -1286,7 +1284,7 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
 
 .. ipython:: python
 
-   s = Series(np.random.permutation(10))
+   s = pd.Series(np.random.permutation(10))
    s
    s.order()
    s.nsmallest(3)
@@ -1303,7 +1301,7 @@ all levels to ``by``.
 
 .. ipython:: python
 
-   df1.columns = MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')])
+   df1.columns = pd.MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')])
    df1.sort_index(by=('a','two'))
 
 
@@ -1336,13 +1334,13 @@ attribute for DataFrames returns a Series with the data type of each column.
 
 .. ipython:: python
 
-   dft = DataFrame(dict( A = np.random.rand(3),
-                         B = 1,
-                         C = 'foo',
-                         D = Timestamp('20010102'),
-                         E = Series([1.0]*3).astype('float32'),
-			 F = False,
-			 G = Series([1]*3,dtype='int8')))
+   dft = pd.DataFrame(dict(A = np.random.rand(3),
+                           B = 1,
+                           C = 'foo',
+                           D = pd.Timestamp('20010102'),
+                           E = pd.Series([1.0]*3).astype('float32'),
+			               F = False,
+			               G = pd.Series([1]*3,dtype='int8')))
    dft
    dft.dtypes
 
@@ -1359,10 +1357,10 @@ general).
 .. ipython:: python
 
    # these ints are coerced to floats
-   Series([1, 2, 3, 4, 5, 6.])
+   pd.Series([1, 2, 3, 4, 5, 6.])
 
    # string data forces an ``object`` dtype
-   Series([1, 2, 3, 6., 'foo'])
+   pd.Series([1, 2, 3, 6., 'foo'])
 
 The method :meth:`~DataFrame.get_dtype_counts` will return the number of columns of
 each type in a ``DataFrame``:
@@ -1378,12 +1376,12 @@ different numeric dtypes will **NOT** be combined. The following example will gi
 
 .. ipython:: python
 
-   df1 = DataFrame(randn(8, 1), columns = ['A'], dtype = 'float32')
+   df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
    df1
    df1.dtypes
-   df2 = DataFrame(dict( A = Series(randn(8),dtype='float16'),
-                         B = Series(randn(8)),
-                         C = Series(np.array(randn(8),dtype='uint8')) ))
+   df2 = pd.DataFrame(dict( A = pd.Series(np.random.randn(8), dtype='float16'),
+                           B = pd.Series(np.random.randn(8)),
+                           C = pd.Series(np.array(np.random.randn(8), dtype='uint8')) ))
    df2
    df2.dtypes
 
@@ -1395,16 +1393,16 @@ By default integer types are ``int64`` and float types are ``float64``,
 
 .. ipython:: python
 
-   DataFrame([1, 2], columns=['a']).dtypes
-   DataFrame({'a': [1, 2]}).dtypes
-   DataFrame({'a': 1 }, index=list(range(2))).dtypes
+   pd.DataFrame([1, 2], columns=['a']).dtypes
+   pd.DataFrame({'a': [1, 2]}).dtypes
+   pd.DataFrame({'a': 1 }, index=list(range(2))).dtypes
 
 Numpy, however will choose *platform-dependent* types when creating arrays.
 The following **WILL** result in ``int32`` on 32-bit platform.
 
 .. ipython:: python
 
-   frame = DataFrame(np.array([1, 2]))
+   frame = pd.DataFrame(np.array([1, 2]))
 
 
 upcasting
@@ -1473,9 +1471,10 @@ but occasionally has non-dates intermixed and you want to represent as missing.
 
 .. ipython:: python
 
-   s = Series([datetime(2001,1,1,0,0),
-              'foo', 1.0, 1, Timestamp('20010104'),
-              '20010105'],dtype='O')
+   import datetime
+   s = pd.Series([datetime.datetime(2001,1,1,0,0),
+                 'foo', 1.0, 1, pd.Timestamp('20010104'),
+                 '20010105'], dtype='O')
    s
    s.convert_objects(convert_dates='coerce')
 
@@ -1527,14 +1526,14 @@ dtypes:
 
 .. ipython:: python
 
-   df = DataFrame({'string': list('abc'),
-                   'int64': list(range(1, 4)),
-                   'uint8': np.arange(3, 6).astype('u1'),
-                   'float64': np.arange(4.0, 7.0),
-                   'bool1': [True, False, True],
-                   'bool2': [False, True, False],
-                   'dates': pd.date_range('now', periods=3).values,
-                   'category': pd.Categorical(list("ABC"))})
+   df = pd.DataFrame({'string': list('abc'),
+                      'int64': list(range(1, 4)),
+                      'uint8': np.arange(3, 6).astype('u1'),
+                      'float64': np.arange(4.0, 7.0),
+                      'bool1': [True, False, True],
+                      'bool2': [False, True, False],
+                      'dates': pd.date_range('now', periods=3).values,
+                      'category': pd.Series(list("ABC")).astype('category')})
    df['tdeltas'] = df.dates.diff()
    df['uint64'] = np.arange(3, 6).astype('u8')
    df['other_dates'] = pd.date_range('20130101', periods=3).values
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index c05d4045e6fcc..0c63759201517 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -6,14 +6,10 @@
    :suppress:
 
    import numpy as np
-   import random
-   import os
-   np.random.seed(123456)
-   from pandas import options
-   from pandas import *
    import pandas as pd
+   np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
-   options.display.max_rows=15
+   pd.options.display.max_rows = 15
 
 
 ****************
@@ -65,14 +61,14 @@ By specifying ``dtype="category"`` when constructing a `Series`:
 
 .. ipython:: python
 
-    s = Series(["a","b","c","a"], dtype="category")
+    s = pd.Series(["a","b","c","a"], dtype="category")
     s
 
 By converting an existing `Series` or column to a ``category`` dtype:
 
 .. ipython:: python
 
-    df = DataFrame({"A":["a","b","c","a"]})
+    df = pd.DataFrame({"A":["a","b","c","a"]})
     df["B"] = df["A"].astype('category')
     df
 
@@ -80,7 +76,7 @@ By using some special functions:
 
 .. ipython:: python
 
-    df = DataFrame({'value': np.random.randint(0, 100, 20)})
+    df = pd.DataFrame({'value': np.random.randint(0, 100, 20)})
     labels = [ "{0} - {1}".format(i, i + 9) for i in range(0, 100, 10) ]
 
     df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels)
@@ -92,11 +88,11 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
 
 .. ipython:: python
 
-    raw_cat = Categorical(["a","b","c","a"], categories=["b","c","d"],
+    raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"],
                              ordered=False)
-    s = Series(raw_cat)
+    s = pd.Series(raw_cat)
     s
-    df = DataFrame({"A":["a","b","c","a"]})
+    df = pd.DataFrame({"A":["a","b","c","a"]})
     df["B"] = raw_cat
     df
 
@@ -104,7 +100,7 @@ You can also specify differently ordered categories or make the resulting data o
 
 .. ipython:: python
 
-    s = Series(["a","b","c","a"])
+    s = pd.Series(["a","b","c","a"])
     s_cat = s.astype("category", categories=["b","c","d"], ordered=False)
     s_cat
 
@@ -129,7 +125,7 @@ To get back to the original Series or `numpy` array, use ``Series.astype(origina
 
 .. ipython:: python
 
-    s = Series(["a","b","c","a"])
+    s = pd.Series(["a","b","c","a"])
     s
     s2 = s.astype('category')
     s2
@@ -143,7 +139,7 @@ constructor to save the factorize step during normal constructor mode:
 .. ipython:: python
 
     splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
-    s = Series(Categorical.from_codes(splitter, categories=["train", "test"]))
+    s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
 
 Description
 -----------
@@ -153,8 +149,8 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri
 
 .. ipython:: python
 
-    cat = Categorical(["a","c","c",np.nan], categories=["b","a","c",np.nan] )
-    df = DataFrame({"cat":cat, "s":["a","c","c",np.nan]})
+    cat = pd.Categorical(["a","c","c",np.nan], categories=["b","a","c",np.nan] )
+    df = pd.DataFrame({"cat":cat, "s":["a","c","c",np.nan]})
     df.describe()
     df["cat"].describe()
 
@@ -168,7 +164,7 @@ passed in values.
 
 .. ipython:: python
 
-    s = Series(["a","b","c","a"], dtype="category")
+    s = pd.Series(["a","b","c","a"], dtype="category")
     s.cat.categories
     s.cat.ordered
 
@@ -176,7 +172,7 @@ It's also possible to pass in the categories in a specific order:
 
 .. ipython:: python
 
-    s = Series(Categorical(["a","b","c","a"], categories=["c","b","a"]))
+    s = pd.Series(pd.Categorical(["a","b","c","a"], categories=["c","b","a"]))
     s.cat.categories
     s.cat.ordered
 
@@ -194,7 +190,7 @@ by using the :func:`Categorical.rename_categories` method:
 
 .. ipython:: python
 
-    s = Series(["a","b","c","a"], dtype="category")
+    s = pd.Series(["a","b","c","a"], dtype="category")
     s
     s.cat.categories = ["Group %s" % g for g in s.cat.categories]
     s
@@ -247,7 +243,7 @@ Removing unused categories can also be done:
 
 .. ipython:: python
 
-    s = Series(Categorical(["a","b","a"], categories=["a","b","c","d"]))
+    s = pd.Series(pd.Categorical(["a","b","a"], categories=["a","b","c","d"]))
     s
     s.cat.remove_unused_categories()
 
@@ -259,7 +255,7 @@ or simply set the categories to a predefined scale, use :func:`Categorical.set_c
 
 .. ipython:: python
 
-    s = Series(["one","two","four", "-"], dtype="category")
+    s = pd.Series(["one","two","four", "-"], dtype="category")
     s
     s = s.cat.set_categories(["one","two","three","four"])
     s
@@ -283,9 +279,9 @@ meaning and certain operations are possible. If the categorical is unordered, ``
 
 .. ipython:: python
 
-    s = Series(Categorical(["a","b","c","a"], ordered=False))
+    s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False))
     s.sort()
-    s = Series(["a","b","c","a"]).astype('category', ordered=True)
+    s = pd.Series(["a","b","c","a"]).astype('category', ordered=True)
     s.sort()
     s
     s.min(), s.max()
@@ -303,7 +299,7 @@ This is even true for strings and numeric data:
 
 .. ipython:: python
 
-    s = Series([1,2,3,1], dtype="category")
+    s = pd.Series([1,2,3,1], dtype="category")
     s = s.cat.set_categories([2,3,1], ordered=True)
     s
     s.sort()
@@ -321,7 +317,7 @@ necessarily make the sort order the same as the categories order.
 
 .. ipython:: python
 
-    s = Series([1,2,3,1], dtype="category")
+    s = pd.Series([1,2,3,1], dtype="category")
     s = s.cat.reorder_categories([2,3,1], ordered=True)
     s
     s.sort()
@@ -351,8 +347,8 @@ The ordering of the categorical is determined by the ``categories`` of that colu
 
 .. ipython:: python
 
-   dfs = DataFrame({'A' : Categorical(list('bbeebbaa'), categories=['e','a','b'], ordered=True),
-                    'B' : [1,2,1,2,2,1,2,1] })
+   dfs = pd.DataFrame({'A' : pd.Categorical(list('bbeebbaa'), categories=['e','a','b'], ordered=True),
+                       'B' : [1,2,1,2,2,1,2,1] })
    dfs.sort(['A', 'B'])
 
 Reordering the ``categories`` changes a future sort.
@@ -385,9 +381,9 @@ categories or a categorical with any list-like object, will raise a TypeError.
 
 .. ipython:: python
 
-    cat = Series([1,2,3]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base = Series([2,2,2]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base2 = Series([2,2,2]).astype("category", ordered=True)
+    cat = pd.Series([1,2,3]).astype("category", categories=[3,2,1], ordered=True)
+    cat_base = pd.Series([2,2,2]).astype("category", categories=[3,2,1], ordered=True)
+    cat_base2 = pd.Series([2,2,2]).astype("category", ordered=True)
 
     cat
     cat_base
@@ -443,19 +439,19 @@ present in the data:
 
 .. ipython:: python
 
-    s = Series(Categorical(["a","b","c","c"], categories=["c","a","b","d"]))
+    s = pd.Series(pd.Categorical(["a","b","c","c"], categories=["c","a","b","d"]))
     s.value_counts()
 
 Groupby will also show "unused" categories:
 
 .. ipython:: python
 
-    cats = Categorical(["a","b","b","b","c","c","c"], categories=["a","b","c","d"])
-    df = DataFrame({"cats":cats,"values":[1,2,2,2,3,4,5]})
+    cats = pd.Categorical(["a","b","b","b","c","c","c"], categories=["a","b","c","d"])
+    df = pd.DataFrame({"cats":cats,"values":[1,2,2,2,3,4,5]})
     df.groupby("cats").mean()
 
-    cats2 = Categorical(["a","a","b","b"], categories=["a","b","c"])
-    df2 = DataFrame({"cats":cats2,"B":["c","d","c","d"], "values":[1,2,3,4]})
+    cats2 = pd.Categorical(["a","a","b","b"], categories=["a","b","c"])
+    df2 = pd.DataFrame({"cats":cats2,"B":["c","d","c","d"], "values":[1,2,3,4]})
     df2.groupby(["cats","B"]).mean()
 
 
@@ -463,8 +459,8 @@ Pivot tables:
 
 .. ipython:: python
 
-    raw_cat = Categorical(["a","a","b","b"], categories=["a","b","c"])
-    df = DataFrame({"A":raw_cat,"B":["c","d","c","d"], "values":[1,2,3,4]})
+    raw_cat = pd.Categorical(["a","a","b","b"], categories=["a","b","c"])
+    df = pd.DataFrame({"A":raw_cat,"B":["c","d","c","d"], "values":[1,2,3,4]})
     pd.pivot_table(df, values='values', index=['A', 'B'])
 
 Data munging
@@ -482,10 +478,10 @@ the ``category`` dtype is preserved.
 
 .. ipython:: python
 
-    idx = Index(["h","i","j","k","l","m","n",])
-    cats = Series(["a","b","b","b","c","c","c"], dtype="category", index=idx)
+    idx = pd.Index(["h","i","j","k","l","m","n",])
+    cats = pd.Series(["a","b","b","b","c","c","c"], dtype="category", index=idx)
     values= [1,2,2,2,3,4,5]
-    df = DataFrame({"cats":cats,"values":values}, index=idx)
+    df = pd.DataFrame({"cats":cats,"values":values}, index=idx)
     df.iloc[2:4,:]
     df.iloc[2:4,:].dtypes
     df.loc["h":"j","cats"]
@@ -527,10 +523,10 @@ Setting values in a categorical column (or `Series`) works as long as the value
 
 .. ipython:: python
 
-    idx = Index(["h","i","j","k","l","m","n"])
-    cats = Categorical(["a","a","a","a","a","a","a"], categories=["a","b"])
+    idx = pd.Index(["h","i","j","k","l","m","n"])
+    cats = pd.Categorical(["a","a","a","a","a","a","a"], categories=["a","b"])
     values = [1,1,1,1,1,1,1]
-    df = DataFrame({"cats":cats,"values":values}, index=idx)
+    df = pd.DataFrame({"cats":cats,"values":values}, index=idx)
 
     df.iloc[2:4,:] = [["b",2],["b",2]]
     df
@@ -543,10 +539,10 @@ Setting values by assigning categorical data will also check that the `categorie
 
 .. ipython:: python
 
-    df.loc["j":"k","cats"] = Categorical(["a","a"], categories=["a","b"])
+    df.loc["j":"k","cats"] = pd.Categorical(["a","a"], categories=["a","b"])
     df
     try:
-        df.loc["j":"k","cats"] = Categorical(["b","b"], categories=["a","b","c"])
+        df.loc["j":"k","cats"] = pd.Categorical(["b","b"], categories=["a","b","c"])
     except ValueError as e:
         print("ValueError: " + str(e))
 
@@ -554,9 +550,9 @@ Assigning a `Categorical` to parts of a column of other types will use the value
 
 .. ipython:: python
 
-    df = DataFrame({"a":[1,1,1,1,1], "b":["a","a","a","a","a"]})
-    df.loc[1:2,"a"] = Categorical(["b","b"], categories=["a","b"])
-    df.loc[2:3,"b"] = Categorical(["b","b"], categories=["a","b"])
+    df = pd.DataFrame({"a":[1,1,1,1,1], "b":["a","a","a","a","a"]})
+    df.loc[1:2,"a"] = pd.Categorical(["b","b"], categories=["a","b"])
+    df.loc[2:3,"b"] = pd.Categorical(["b","b"], categories=["a","b"])
     df
     df.dtypes
 
@@ -569,9 +565,9 @@ but the categories of these categoricals need to be the same:
 
 .. ipython:: python
 
-    cat = Series(["a","b"], dtype="category")
+    cat = pd.Series(["a","b"], dtype="category")
     vals = [1,2]
-    df = DataFrame({"cats":cat, "vals":vals})
+    df = pd.DataFrame({"cats":cat, "vals":vals})
     res = pd.concat([df,df])
     res
     res.dtypes
@@ -611,12 +607,12 @@ relevant columns back to `category` and assign the right categories and categori
 
 .. ipython:: python
 
-    s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'd']))
+    s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'd']))
     # rename the categories
     s.cat.categories = ["very good", "good", "bad"]
     # reorder the categories and add missing categories
     s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
-    df = DataFrame({"cats":s, "vals":[1,2,3,4,5,6]})
+    df = pd.DataFrame({"cats":s, "vals":[1,2,3,4,5,6]})
     csv = StringIO()
     df.to_csv(csv)
     df2 = pd.read_csv(StringIO(csv.getvalue()))
@@ -643,10 +639,10 @@ available ("missing value") or `np.nan` is a valid category.
 
 .. ipython:: python
 
-    s = Series(["a","b",np.nan,"a"], dtype="category")
+    s = pd.Series(["a","b",np.nan,"a"], dtype="category")
     # only two categories
     s
-    s2 = Series(["a","b","c","a"], dtype="category")
+    s2 = pd.Series(["a","b","c","a"], dtype="category")
     s2.cat.categories = [1,2,np.nan]
     # three categories, np.nan included
     s2
@@ -660,11 +656,11 @@ available ("missing value") or `np.nan` is a valid category.
 
 .. ipython:: python
 
-    c = Series(["a","b",np.nan], dtype="category")
+    c = pd.Series(["a","b",np.nan], dtype="category")
     c.cat.set_categories(["a","b",np.nan], inplace=True)
     # will be inserted as a NA category:
     c[0] = np.nan
-    s = Series(c)
+    s = pd.Series(c)
     s
     pd.isnull(s)
     s.fillna("a")
@@ -697,7 +693,7 @@ an ``object`` dtype is a constant times the length of the data.
 
 .. ipython:: python
 
-   s = Series(['foo','bar']*1000)
+   s = pd.Series(['foo','bar']*1000)
 
    # object dtype
    s.nbytes
@@ -712,7 +708,7 @@ an ``object`` dtype is a constant times the length of the data.
 
    .. ipython:: python
 
-      s = Series(['foo%04d' % i for i in range(2000)])
+      s = pd.Series(['foo%04d' % i for i in range(2000)])
 
       # object dtype
       s.nbytes
@@ -734,7 +730,7 @@ will work with the current pandas version, resulting in subtle bugs:
 
 .. code-block:: python
 
-    >>> cat = Categorical([1,2], [1,2,3])
+    >>> cat = pd.Categorical([1,2], [1,2,3])
     >>> # old version
     >>> cat.get_values()
     array([2, 3], dtype=int64)
@@ -762,7 +758,7 @@ object and not as a low-level `numpy` array dtype. This leads to some problems.
     except TypeError as e:
         print("TypeError: " + str(e))
 
-    dtype = Categorical(["a"]).dtype
+    dtype = pd.Categorical(["a"]).dtype
     try:
         np.dtype(dtype)
     except TypeError as e:
@@ -780,15 +776,15 @@ To check if a Series contains Categorical data, with pandas 0.16 or later, use
 
 .. ipython:: python
 
-    hasattr(Series(['a'], dtype='category'), 'cat')
-    hasattr(Series(['a']), 'cat')
+    hasattr(pd.Series(['a'], dtype='category'), 'cat')
+    hasattr(pd.Series(['a']), 'cat')
 
 Using `numpy` functions on a `Series` of type ``category`` should not work as `Categoricals`
 are not numeric data (even in the case that ``.categories`` is numeric).
 
 .. ipython:: python
 
-    s = Series(Categorical([1,2,3,4]))
+    s = pd.Series(pd.Categorical([1,2,3,4]))
     try:
         np.sum(s)
         #same with np.log(s),..
@@ -807,9 +803,9 @@ basic type) and applying along columns will also convert to object.
 
 .. ipython:: python
 
-    df = DataFrame({"a":[1,2,3,4],
-                    "b":["a","b","c","d"],
-                    "cats":Categorical([1,2,3,2])})
+    df = pd.DataFrame({"a":[1,2,3,4],
+                       "b":["a","b","c","d"],
+                       "cats":pd.Categorical([1,2,3,2])})
     df.apply(lambda row: type(row["cats"]), axis=1)
     df.apply(lambda col: col.dtype, axis=0)
 
@@ -826,10 +822,10 @@ Setting the index, will create create a ``CategoricalIndex``
 
 .. ipython:: python
 
-    cats = Categorical([1,2,3,4], categories=[4,2,3,1])
+    cats = pd.Categorical([1,2,3,4], categories=[4,2,3,1])
     strings = ["a","b","c","d"]
     values = [4,2,3,1]
-    df = DataFrame({"strings":strings, "values":values}, index=cats)
+    df = pd.DataFrame({"strings":strings, "values":values}, index=cats)
     df.index
     # This now sorts by the categories order
     df.sort_index()
@@ -846,12 +842,12 @@ means that changes to the `Series` will in most cases change the original `Categ
 
 .. ipython:: python
 
-    cat = Categorical([1,2,3,10], categories=[1,2,3,4,10])
-    s = Series(cat, name="cat")
+    cat = pd.Categorical([1,2,3,10], categories=[1,2,3,4,10])
+    s = pd.Series(cat, name="cat")
     cat
     s.iloc[0:2] = 10
     cat
-    df = DataFrame(s)
+    df = pd.DataFrame(s)
     df["cat"].cat.categories = [1,2,3,4,5]
     cat
 
@@ -859,8 +855,8 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse `Categorical
 
 .. ipython:: python
 
-    cat = Categorical([1,2,3,10], categories=[1,2,3,4,10])
-    s = Series(cat, name="cat", copy=True)
+    cat = pd.Categorical([1,2,3,10], categories=[1,2,3,4,10])
+    s = pd.Series(cat, name="cat", copy=True)
     cat
     s.iloc[0:2] = 10
     cat
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 4621d7bd9b216..dfb9fab19bf31 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -258,7 +258,7 @@ These functions can be applied to ndarrays or Series objects:
    ts.plot(style='k--')
 
    @savefig rolling_mean_ex.png
-   rolling_mean(ts, 60).plot(style='k')
+   pd.rolling_mean(ts, 60).plot(style='k')
 
 They can also be applied to DataFrame objects. This is really just syntactic
 sugar for applying the moving window operator to all of the DataFrame's columns:
@@ -275,7 +275,7 @@ sugar for applying the moving window operator to all of the DataFrame's columns:
    df = df.cumsum()
 
    @savefig rolling_mean_frame.png
-   rolling_sum(df, 60).plot(subplots=True)
+   pd.rolling_sum(df, 60).plot(subplots=True)
 
 The ``rolling_apply`` function takes an extra ``func`` argument and performs
 generic rolling computations. The ``func`` argument should be a single function
@@ -286,7 +286,7 @@ compute the mean absolute deviation on a rolling basis:
 
    mad = lambda x: np.fabs(x - x.mean()).mean()
    @savefig rolling_apply_ex.png
-   rolling_apply(ts, 60, mad).plot(style='k')
+   pd.rolling_apply(ts, 60, mad).plot(style='k')
 
 The ``rolling_window`` function performs a generic rolling window computation
 on the input data. The weights used in the window are specified by the ``win_type``
@@ -311,21 +311,21 @@ keyword. The list of recognized types are:
 
    ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10))
 
-   rolling_window(ser, 5, 'triang')
+   pd.rolling_window(ser, 5, 'triang')
 
 Note that the ``boxcar`` window is equivalent to ``rolling_mean``.
 
 .. ipython:: python
 
-   rolling_window(ser, 5, 'boxcar')
+   pd.rolling_window(ser, 5, 'boxcar')
 
-   rolling_mean(ser, 5)
+   pd.rolling_mean(ser, 5)
 
 For some windowing functions, additional parameters must be specified:
 
 .. ipython:: python
 
-   rolling_window(ser, 5, 'gaussian', std=0.1)
+   pd.rolling_window(ser, 5, 'gaussian', std=0.1)
 
 By default the labels are set to the right edge of the window, but a
 ``center`` keyword is available so the labels can be set at the center.
@@ -333,11 +333,11 @@ This keyword is available in other rolling functions as well.
 
 .. ipython:: python
 
-   rolling_window(ser, 5, 'boxcar')
+   pd.rolling_window(ser, 5, 'boxcar')
 
-   rolling_window(ser, 5, 'boxcar', center=True)
+   pd.rolling_window(ser, 5, 'boxcar', center=True)
 
-   rolling_mean(ser, 5, center=True)
+   pd.rolling_mean(ser, 5, center=True)
 
 .. _stats.moments.normalization:
 
@@ -376,7 +376,7 @@ For example:
 .. ipython:: python
 
    df2 = df[:20]
-   rolling_corr(df2, df2['B'], window=5)
+   pd.rolling_corr(df2, df2['B'], window=5)
 
 .. _stats.moments.corr_pairwise:
 
@@ -401,12 +401,12 @@ can even be omitted:
 
 .. ipython:: python
 
-   covs = rolling_cov(df[['B','C','D']], df[['A','B','C']], 50, pairwise=True)
+   covs = pd.rolling_cov(df[['B','C','D']], df[['A','B','C']], 50, pairwise=True)
    covs[df.index[-50]]
 
 .. ipython:: python
 
-   correls = rolling_corr(df, 50)
+   correls = pd.rolling_corr(df, 50)
    correls[df.index[-50]]
 
 .. note::
@@ -440,9 +440,9 @@ they are implemented in pandas such that the following two calls are equivalent:
 
 .. ipython:: python
 
-   rolling_mean(df, window=len(df), min_periods=1)[:5]
+   pd.rolling_mean(df, window=len(df), min_periods=1)[:5]
 
-   expanding_mean(df)[:5]
+   pd.expanding_mean(df)[:5]
 
 Like the ``rolling_`` functions, the following methods are included in the
 ``pandas`` namespace or can be located in ``pandas.stats.moments``.
@@ -501,7 +501,7 @@ relative impact of an individual data point. As an example, here is the
    ts.plot(style='k--')
 
    @savefig expanding_mean_frame.png
-   expanding_mean(ts).plot(style='k')
+   pd.expanding_mean(ts).plot(style='k')
 
 .. _stats.moments.exponentially_weighted:
 
@@ -583,7 +583,7 @@ Here is an example for a univariate time series:
    ts.plot(style='k--')
 
    @savefig ewma_ex.png
-   ewma(ts, span=20).plot(style='k')
+   pd.ewma(ts, span=20).plot(style='k')
 
 All the EW functions have a ``min_periods`` argument, which has the same
 meaning it does for all the ``expanding_`` and ``rolling_`` functions:

From c38f51b39b306e7348560e3aa452bd4289968093 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 19 May 2015 11:34:14 -0400
Subject: [PATCH 219/239] BUG: consistent datetime display format with < ms
 #10170

---
 doc/source/whatsnew/v0.17.0.txt         |  2 +-
 pandas/tests/test_format.py             | 44 +++++++++++++++--
 pandas/tseries/tests/test_timeseries.py |  2 +-
 pandas/tslib.pyx                        | 64 ++++++++++++++++---------
 4 files changed, 83 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index f74bd70abb3a8..1cff74d41f686 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -68,7 +68,7 @@ Bug Fixes
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
 - Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
-
+- Bug in display datetimes with mixed frequencies uniformly; display 'ms' datetimes to the proper precision. (:issue:`10170`)
 
 
 - Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index fd9d9546ba235..a7129bca59a7f 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -14,7 +14,7 @@
 from numpy.random import randn
 import numpy as np
 
-from pandas import DataFrame, Series, Index, Timestamp, MultiIndex
+from pandas import DataFrame, Series, Index, Timestamp, MultiIndex, date_range, NaT
 
 import pandas.core.format as fmt
 import pandas.util.testing as tm
@@ -2495,7 +2495,7 @@ def test_to_string(self):
 
     def test_freq_name_separation(self):
         s = Series(np.random.randn(10),
-                   index=pd.date_range('1/1/2000', periods=10), name=0)
+                   index=date_range('1/1/2000', periods=10), name=0)
 
         result = repr(s)
         self.assertTrue('Freq: D, Name: 0' in result)
@@ -2556,7 +2556,6 @@ def test_float_trim_zeros(self):
 
     def test_datetimeindex(self):
 
-        from pandas import date_range, NaT
         index = date_range('20130102',periods=6)
         s = Series(1,index=index)
         result = s.to_string()
@@ -2574,7 +2573,6 @@ def test_datetimeindex(self):
 
     def test_timedelta64(self):
 
-        from pandas import date_range
         from datetime import datetime, timedelta
 
         Series(np.array([1100, 20], dtype='timedelta64[ns]')).to_string()
@@ -3179,6 +3177,44 @@ def test_date_nanos(self):
         result = fmt.Datetime64Formatter(x).get_result()
         self.assertEqual(result[0].strip(), "1970-01-01 00:00:00.000000200")
 
+    def test_dates_display(self):
+
+        # 10170
+        # make sure that we are consistently display date formatting
+        x = Series(date_range('20130101 09:00:00',periods=5,freq='D'))
+        x.iloc[1] = np.nan
+        result = fmt.Datetime64Formatter(x).get_result()
+        self.assertEqual(result[0].strip(), "2013-01-01 09:00:00")
+        self.assertEqual(result[1].strip(), "NaT")
+        self.assertEqual(result[4].strip(), "2013-01-05 09:00:00")
+
+        x = Series(date_range('20130101 09:00:00',periods=5,freq='s'))
+        x.iloc[1] = np.nan
+        result = fmt.Datetime64Formatter(x).get_result()
+        self.assertEqual(result[0].strip(), "2013-01-01 09:00:00")
+        self.assertEqual(result[1].strip(), "NaT")
+        self.assertEqual(result[4].strip(), "2013-01-01 09:00:04")
+
+        x = Series(date_range('20130101 09:00:00',periods=5,freq='ms'))
+        x.iloc[1] = np.nan
+        result = fmt.Datetime64Formatter(x).get_result()
+        self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000")
+        self.assertEqual(result[1].strip(), "NaT")
+        self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.004")
+
+        x = Series(date_range('20130101 09:00:00',periods=5,freq='us'))
+        x.iloc[1] = np.nan
+        result = fmt.Datetime64Formatter(x).get_result()
+        self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000")
+        self.assertEqual(result[1].strip(), "NaT")
+        self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000004")
+
+        x = Series(date_range('20130101 09:00:00',periods=5,freq='N'))
+        x.iloc[1] = np.nan
+        result = fmt.Datetime64Formatter(x).get_result()
+        self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000000")
+        self.assertEqual(result[1].strip(), "NaT")
+        self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000000004")
 
 class TestNaTFormatting(tm.TestCase):
     def test_repr(self):
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 6c20b02324688..8412ba8d4aad1 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -791,7 +791,7 @@ def test_series_repr_nat(self):
         series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')
 
         result = repr(series)
-        expected = ('0          1970-01-01 00:00:00\n'
+        expected = ('0   1970-01-01 00:00:00.000000\n'
                     '1   1970-01-01 00:00:00.000001\n'
                     '2   1970-01-01 00:00:00.000002\n'
                     '3                          NaT\n'
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 2b45718d1f9ea..59eb432844ee3 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -1418,6 +1418,8 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object f
     """
     cdef:
         int64_t val, ns, N = len(values)
+        ndarray[int64_t] consider_values
+        bint show_ms = 0, show_us = 0, show_ns = 0, basic_format = 0
         ndarray[object] result = np.empty(N, dtype=object)
         object ts, res
         pandas_datetimestruct dts
@@ -1425,13 +1427,27 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object f
     if na_rep is None:
        na_rep = 'NaT'
 
+    # if we don't have a format nor tz, then choose
+    # a format based on precision
+    basic_format = format is None and tz is None
+    if basic_format:
+        consider_values = values[values != iNaT]
+        show_ns = (consider_values%1000).any()
+
+        if not show_ns:
+            consider_values //= 1000
+            show_us = (consider_values%1000).any()
+
+            if not show_ms:
+                consider_values //= 1000
+                show_ms = (consider_values%1000).any()
+
     for i in range(N):
-       val = values[i]
+        val = values[i]
 
-       if val == iNaT:
-          result[i] = na_rep
-       else:
-          if format is None and tz is None:
+        if val == iNaT:
+            result[i] = na_rep
+        elif basic_format:
 
             pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
             res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year,
@@ -1441,27 +1457,29 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object f
                                                    dts.min,
                                                    dts.sec)
 
-            ns = dts.ps / 1000
-
-            if ns != 0:
-               res += '.%.9d' % (ns + 1000 * dts.us)
-            elif dts.us != 0:
-               res += '.%.6d' % dts.us
+            if show_ns:
+                ns = dts.ps / 1000
+                res += '.%.9d' % (ns + 1000 * dts.us)
+            elif show_us:
+                res += '.%.6d' % dts.us
+            elif show_ms:
+                res += '.%.3d' % (dts.us/1000)
 
             result[i] = res
 
-          else:
-             ts = Timestamp(val, tz=tz)
-             if format is None:
-                 result[i] = str(ts)
-             else:
-
-                 # invalid format string
-                 # requires dates > 1900
-                 try:
-                     result[i] = ts.strftime(format)
-                 except ValueError:
-                     result[i] = str(ts)
+        else:
+
+            ts = Timestamp(val, tz=tz)
+            if format is None:
+                result[i] = str(ts)
+            else:
+
+                # invalid format string
+                # requires dates > 1900
+                try:
+                    result[i] = ts.strftime(format)
+                except ValueError:
+                    result[i] = str(ts)
 
     return result
 

From bddc128cafac963d2e93999326d3e8a3df0340ea Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Tue, 19 May 2015 09:15:59 -0700
Subject: [PATCH 220/239] DOC/CLN: period_range kwarg descriptions missing and
 other minor doc fixes

---
 pandas/core/groupby.py   |  2 +-
 pandas/tseries/period.py | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 4b7d8b9796f01..ffc3e6a08221c 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -187,7 +187,7 @@ class Grouper(object):
 
     Examples
     --------
-    >>> df.groupby(Grouper(key='A')) : syntatic sugar for df.groupby('A')
+    >>> df.groupby(Grouper(key='A')) : syntactic sugar for df.groupby('A')
     >>> df.groupby(Grouper(key='date',freq='60s')) : specify a resample on the column 'date'
     >>> df.groupby(Grouper(level='date',freq='60s',axis=1)) :
         specify a resample on the level 'date' on the columns axis with a frequency of 60s
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 98d9f9f14d3da..510887a185054 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -110,20 +110,20 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
 
     Parameters
     ----------
-    data  : array-like (1-dimensional), optional
+    data : array-like (1-dimensional), optional
         Optional period-like data to construct index with
     dtype : NumPy dtype (default: i8)
-    copy  : bool
+    copy : bool
         Make a copy of input ndarray
     freq : string or period object, optional
         One of pandas period strings or corresponding objects
     start : starting value, period-like, optional
         If data is None, used as the start point in generating regular
         period data.
-    periods  : int, optional, > 0
+    periods : int, optional, > 0
         Number of periods to generate, if generating index. Takes precedence
         over end argument
-    end   : end value, period-like, optional
+    end : end value, period-like, optional
         If periods is none, generated index will extend to first conforming
         period on or just past end argument
     year : int, array, or Series, default None
@@ -501,7 +501,6 @@ def shift(self, n):
         ----------
         n : int
             Periods to shift by
-        freq : freq string
 
         Returns
         -------
@@ -970,8 +969,8 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None):
 
     Parameters
     ----------
-    start :
-    end :
+    start : starting value, period-like, optional
+    end : ending value, period-like, optional
     periods : int, default None
         Number of periods in the index
     freq : str/DateOffset, default 'D'

From 3f223228c9b0854a95db8df381f671eb7d8a7bd3 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 21 May 2015 22:47:08 +0900
Subject: [PATCH 221/239] DOC: Add Index.difference to API

---
 doc/source/api.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 3f47c0380116c..3b2e8b65768bb 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1262,8 +1262,6 @@ Modifying and Computations
    Index.argmax
    Index.copy
    Index.delete
-   Index.diff
-   Index.sym_diff
    Index.drop
    Index.drop_duplicates
    Index.duplicated
@@ -1309,15 +1307,17 @@ Time-specific operations
 
    Index.shift
 
-Combining / joining / merging
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Combining / joining / set operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
    :toctree: generated/
 
    Index.append
-   Index.intersection
    Index.join
+   Index.intersection
    Index.union
+   Index.difference
+   Index.sym_diff
 
 Selecting
 ~~~~~~~~~

From 05fce827966ba5df95587466090edb2df8843caf Mon Sep 17 00:00:00 2001
From: Jake VanderPlas <jakevdp@gmail.com>
Date: Fri, 22 May 2015 13:54:31 -0700
Subject: [PATCH 222/239] DOC: minor doc fix for Series.append; indices can
 overlap

---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8ef9adb1d24a4..6367fb4fe0396 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1442,7 +1442,7 @@ def searchsorted(self, v, side='left', sorter=None):
 
     def append(self, to_append, verify_integrity=False):
         """
-        Concatenate two or more Series. The indexes must not overlap
+        Concatenate two or more Series.
 
         Parameters
         ----------

From b3f77fb8be7c7113878d88785183776af9cb441d Mon Sep 17 00:00:00 2001
From: Robin Wilson <robin@rtwilson.com>
Date: Sat, 23 May 2015 14:17:31 +0100
Subject: [PATCH 223/239] DOC: Updated to mention axis='index' and
 axis='columns

Updated docs throughout DataFrame methods to mention
that axis can be set to 'index' or 'column' instead of 0 or 1
which improves readability significantly.
---
 pandas/core/frame.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ed01323eb9a27..f36108262432d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2745,7 +2745,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
 
         Parameters
         ----------
-        axis : {0, 1}, or tuple/list thereof
+        axis : {0 or 'index', 1 or 'columns'}, or tuple/list thereof
             Pass tuple or list to drop on multiple axes
         how : {'any', 'all'}
             * any : if any NA values are present, drop that label
@@ -2890,7 +2890,7 @@ def sort(self, columns=None, axis=0, ascending=True,
         ascending : boolean or list, default True
             Sort ascending vs. descending. Specify list for multiple sort
             orders
-        axis : {0, 1}
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             Sort index/rows versus columns
         inplace : boolean, default False
             Sort the DataFrame without creating a new instance
@@ -2919,7 +2919,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False,
 
         Parameters
         ----------
-        axis : {0, 1}
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             Sort index/rows versus columns
         by : object
             Column name(s) in frame. Accepts a column name or a list
@@ -3027,7 +3027,7 @@ def sortlevel(self, level=0, axis=0, ascending=True,
         Parameters
         ----------
         level : int
-        axis : {0, 1}
+        axis : {0 or 'index', 1 or 'columns'}, default 0
         ascending : boolean, default True
         inplace : boolean, default False
             Sort the DataFrame without creating a new instance
@@ -3639,9 +3639,9 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
         ----------
         func : function
             Function to apply to each column/row
-        axis : {0, 1}
-            * 0 : apply function to each column
-            * 1 : apply function to each row
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            * 0 or 'index': apply function to each column
+            * 1 or 'columns': apply function to each row
         broadcast : boolean, default False
             For aggregation functions, return object of same size with values
             propagated
@@ -4162,8 +4162,8 @@ def corrwith(self, other, axis=0, drop=False):
         Parameters
         ----------
         other : DataFrame
-        axis : {0, 1}
-            0 to compute column-wise, 1 for row-wise
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            0 or 'index' to compute column-wise, 1 or 'columns' for row-wise
         drop : boolean, default False
             Drop missing indices from result, default returns union of all
 
@@ -4214,8 +4214,8 @@ def count(self, axis=0, level=None, numeric_only=False):
 
         Parameters
         ----------
-        axis : {0, 1}
-            0 for row-wise, 1 for column-wise
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            0 or 'index' for row-wise, 1 or 'columns' for column-wise
         level : int or level name, default None
             If the axis is a MultiIndex (hierarchical), count along a
             particular level, collapsing into a DataFrame
@@ -4368,8 +4368,8 @@ def idxmin(self, axis=0, skipna=True):
 
         Parameters
         ----------
-        axis : {0, 1}
-            0 for row-wise, 1 for column-wise
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            0 or 'index' for row-wise, 1 or 'columns' for column-wise
         skipna : boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result
             will be NA
@@ -4399,8 +4399,8 @@ def idxmax(self, axis=0, skipna=True):
 
         Parameters
         ----------
-        axis : {0, 1}
-            0 for row-wise, 1 for column-wise
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            0 or 'index' for row-wise, 1 or 'columns' for column-wise
         skipna : boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result
             will be first index.
@@ -4446,9 +4446,9 @@ def mode(self, axis=0, numeric_only=False):
 
         Parameters
         ----------
-        axis : {0, 1, 'index', 'columns'} (default 0)
-            * 0/'index' : get mode of each column
-            * 1/'columns' : get mode of each row
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            * 0 or 'index' : get mode of each column
+            * 1 or 'columns' : get mode of each row
         numeric_only : boolean, default False
             if True, only apply to numeric columns
 
@@ -4553,7 +4553,7 @@ def rank(self, axis=0, numeric_only=None, method='average',
 
         Parameters
         ----------
-        axis : {0, 1}, default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             Ranks over columns (0) or rows (1)
         numeric_only : boolean, default None
             Include only float, int, boolean data
@@ -4605,7 +4605,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
         how : {'s', 'e', 'start', 'end'}
             Convention for converting period to timestamp; start of period
             vs. end
-        axis : {0, 1} default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis to convert (the index by default)
         copy : boolean, default True
             If false then underlying input data is not copied
@@ -4636,7 +4636,7 @@ def to_period(self, freq=None, axis=0, copy=True):
         Parameters
         ----------
         freq : string, default
-        axis : {0, 1}, default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis to convert (the index by default)
         copy : boolean, default True
             If False then underlying input data is not copied

From cfae9918cb997abcb3f847db91a999326ab765a6 Mon Sep 17 00:00:00 2001
From: Tom Ajamian <tc4484@gmail.com>
Date: Sat, 23 May 2015 14:31:34 -0400
Subject: [PATCH 224/239] BUG: closes issue #7212 - side effect on passed
 columns list

---
 doc/source/whatsnew/v0.17.0.txt  |  2 ++
 pandas/io/pytables.py            |  4 ++++
 pandas/io/tests/test_pytables.py | 23 +++++++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 1cff74d41f686..d7955d7210ade 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -60,6 +60,8 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug where read_hdf store.select modifies the passed columns list when
+  multi-indexed (:issue:`7212`)
 - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 458a245da6bdb..4cbc7aeaa3df7 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3453,6 +3453,10 @@ def get_blk_items(mgr, blocks):
     def process_axes(self, obj, columns=None):
         """ process axes filters """
 
+        # make a copy to avoid side effects
+        if columns is not None:
+            columns = list(columns)
+
         # make sure to include levels if we have them
         if columns is not None and self.is_multi_index:
             for n in self.levels:
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 6cfd569904097..7d9c3c051344f 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4617,6 +4617,29 @@ def test_preserve_timedeltaindex_type(self):
             store['df'] = df
             assert_frame_equal(store['df'], df)
 
+    def test_colums_multiindex_modified(self):
+        # BUG: 7212
+        # read_hdf store.select modified the passed columns parameters
+        # when multi-indexed.
+
+        df = DataFrame(np.random.rand(4, 5),
+                       index=list('abcd'),
+                       columns=list('ABCDE'))
+        df.index.name = 'letters'
+        df = df.set_index(keys='E', append=True)
+
+        data_columns = df.index.names+df.columns.tolist()
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df',
+                      mode='a',
+                      append=True,
+                      data_columns=data_columns,
+                      index=False)
+            cols2load = list('BCD')
+            cols2load_original = list(cols2load)
+            df_loaded = read_hdf(path, 'df', columns=cols2load)
+            self.assertTrue(cols2load_original == cols2load)
+
 
 def _test_sort(obj):
     if isinstance(obj, DataFrame):

From 0ffd458d15c630966a9522aa208a8d4378990252 Mon Sep 17 00:00:00 2001
From: Robin Wilson <robin@rtwilson.com>
Date: Mon, 25 May 2015 18:59:01 +0100
Subject: [PATCH 225/239] DOC: Added nlargest/nsmallest to API docs

Fixes #10145
---
 doc/source/api.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 3b2e8b65768bb..f5ba03afc9f19 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -358,6 +358,8 @@ Computations / Descriptive Stats
    Series.median
    Series.min
    Series.mode
+   Series.nlargest
+   Series.nsmallest
    Series.pct_change
    Series.prod
    Series.quantile

From e450e6d927ab380473758de9d9d0dbfa10a28c54 Mon Sep 17 00:00:00 2001
From: Morton Fox <github@qslw.com>
Date: Tue, 26 May 2015 12:32:46 -0400
Subject: [PATCH 226/239] Update bq link

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c76fbe7df9e6b..8623ee170d154 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ conda install pandas
      - xlrd >= 0.9.0
   - [XlsxWriter](https://pypi.python.org/pypi/XlsxWriter)
      - Alternative Excel writer.
-- [Google bq Command Line Tool](https://developers.google.com/bigquery/bq-command-line-tool/)
+- [Google bq Command Line Tool](https://cloud.google.com/bigquery/bq-command-line-tool)
   - Needed for `pandas.io.gbq`
 - [boto](https://pypi.python.org/pypi/boto): necessary for Amazon S3 access.
 - One of the following combinations of libraries is needed to use the

From db1c8c540550fc4c69505e6a5681722fad5d41f4 Mon Sep 17 00:00:00 2001
From: Artemy Kolchinsky <akolchin@indiana.edu>
Date: Tue, 26 May 2015 19:07:35 -0400
Subject: [PATCH 227/239] BUG: plot doesnt default to matplotlib axes.grid
 setting (#9792)

Cleanup
---
 doc/source/whatsnew/v0.17.0.txt |  1 +
 pandas/tests/test_graphics.py   | 44 +++++++++++++++++++++++++++++++++
 pandas/tools/plotting.py        |  2 +-
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index d7955d7210ade..a7917e81f7057 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -76,5 +76,6 @@ Bug Fixes
 - Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
 
 - Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
+- Bug in `plot` not defaulting to matplotlib `axes.grid` setting (:issue:`9792`)
 
 
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 4c9d5a9207dd7..82f4b8c05ca06 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -439,6 +439,38 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None,
                 else:
                     raise AssertionError
 
+    def _check_grid_settings(self, obj, kinds, kws={}):
+        # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
+
+        import matplotlib as mpl
+
+        def is_grid_on():
+            xoff = all(not g.gridOn for g in self.plt.gca().xaxis.get_major_ticks())
+            yoff = all(not g.gridOn for g in self.plt.gca().yaxis.get_major_ticks())
+            return not(xoff and yoff)
+
+        spndx=1
+        for kind in kinds:
+            self.plt.subplot(1,4*len(kinds),spndx); spndx+=1
+            mpl.rc('axes',grid=False)
+            obj.plot(kind=kind, **kws)
+            self.assertFalse(is_grid_on())    
+
+            self.plt.subplot(1,4*len(kinds),spndx); spndx+=1
+            mpl.rc('axes',grid=True)
+            obj.plot(kind=kind, grid=False, **kws)
+            self.assertFalse(is_grid_on())    
+
+            if kind != 'pie':
+                self.plt.subplot(1,4*len(kinds),spndx); spndx+=1
+                mpl.rc('axes',grid=True)
+                obj.plot(kind=kind, **kws)
+                self.assertTrue(is_grid_on())
+
+                self.plt.subplot(1,4*len(kinds),spndx); spndx+=1
+                mpl.rc('axes',grid=False)
+                obj.plot(kind=kind, grid=True, **kws)
+                self.assertTrue(is_grid_on())
 
 @tm.mplskip
 class TestSeriesPlots(TestPlotBase):
@@ -1108,6 +1140,12 @@ def test_table(self):
         _check_plot_works(self.series.plot, table=True)
         _check_plot_works(self.series.plot, table=self.series)
 
+    @slow
+    def test_series_grid_settings(self):
+        # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
+        self._check_grid_settings(Series([1,2,3]), 
+            plotting._series_kinds + plotting._common_kinds)
+
 
 @tm.mplskip
 class TestDataFramePlots(TestPlotBase):
@@ -3426,6 +3464,12 @@ def test_sharey_and_ax(self):
                              "y label is invisible but shouldn't")
 
 
+    @slow
+    def test_df_grid_settings(self):
+        # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
+        self._check_grid_settings(DataFrame({'a':[1,2,3],'b':[2,3,4]}), 
+            plotting._dataframe_kinds, kws={'x':'a','y':'b'})
+        
 
 @tm.mplskip
 class TestDataFrameGroupByPlots(TestPlotBase):
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 04dd4d3395684..76685e2589012 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -810,7 +810,7 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None,
                 self.rot = self._default_rot
 
         if grid is None:
-            grid = False if secondary_y else True
+            grid = False if secondary_y else self.plt.rcParams['axes.grid']
 
         self.grid = grid
         self.legend = legend

From f6dc839d74279ab046777da8acb62115acbbe375 Mon Sep 17 00:00:00 2001
From: Thomas Grainger <tom@yplanapp.com>
Date: Wed, 27 May 2015 17:09:28 +0100
Subject: [PATCH 228/239] Close mysql connection in TestXMySQL to prevent tests
 freezing

---
 pandas/io/tests/test_sql.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index fa7debeb228ce..9576f80696350 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -2195,6 +2195,13 @@ def setUp(self):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
+    def tearDown(self):
+        from pymysql.err import Error
+        try:
+            self.db.close()
+        except Error:
+            pass
+
     def test_basic(self):
         _skip_if_no_pymysql()
         frame = tm.makeTimeDataFrame()

From 0c22676027ece00dd0a8ac822506b9c67d994958 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Tue, 29 Apr 2014 01:11:23 +0900
Subject: [PATCH 229/239] BUG: GroupBy.get_group raises ValueError when group
 key contains NaT

---
 doc/source/groupby.rst          |  8 +++---
 doc/source/whatsnew/v0.17.0.txt |  6 +++++
 pandas/core/groupby.py          |  6 ++++-
 pandas/src/generate_code.py     |  4 ++-
 pandas/src/generated.pyx        | 14 ++++++-----
 pandas/tests/test_groupby.py    | 44 ++++++++++++++++++++++++++++++++-
 pandas/tests/test_index.py      | 19 ++++++++++++++
 7 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 7ad2641dec52a..c9e18b585c764 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -784,11 +784,11 @@ will be (silently) dropped. Thus, this does not pose any problems:
 
    df.groupby('A').std()
 
-NA group handling
-~~~~~~~~~~~~~~~~~
+NA and NaT group handling
+~~~~~~~~~~~~~~~~~~~~~~~~~
 
-If there are any NaN values in the grouping key, these will be automatically
-excluded. So there will never be an "NA group". This was not the case in older
+If there are any NaN or NaT values in the grouping key, these will be automatically
+excluded. So there will never be an "NA group" or "NaT group". This was not the case in older
 versions of pandas, but users were generally discarding the NA group anyway
 (and supporting it was an implementation headache).
 
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index a7917e81f7057..5d4d149798d21 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -69,13 +69,19 @@ Bug Fixes
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
+
 - Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
 - Bug in display datetimes with mixed frequencies uniformly; display 'ms' datetimes to the proper precision. (:issue:`10170`)
 
 
+
 - Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
 
+
 - Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
 - Bug in `plot` not defaulting to matplotlib `axes.grid` setting (:issue:`9792`)
 
 
+- Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
+
+
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index ffc3e6a08221c..51674bad60f5b 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -426,7 +426,11 @@ def convert(key, s):
                 return Timestamp(key).asm8
             return key
 
-        sample = next(iter(self.indices))
+        if len(self.indices) > 0:
+            sample = next(iter(self.indices))
+        else:
+            sample = None       # Dummy sample
+
         if isinstance(sample, tuple):
             if not isinstance(name, tuple):
                 msg = ("must supply a tuple to get_group with multiple"
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index a0cdc0ff5e841..598cdff30e4f7 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -37,6 +37,8 @@
 
 cimport util
 from util cimport is_array, _checknull, _checknan, get_nat
+cimport lib
+from lib cimport is_null_datetimelike
 
 cdef int64_t iNaT = get_nat()
 
@@ -673,7 +675,7 @@ def groupby_%(name)s(ndarray[%(c_type)s] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index 79722a26ebedc..428decd4dca10 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -28,6 +28,8 @@ ctypedef unsigned char UChar
 
 cimport util
 from util cimport is_array, _checknull, _checknan, get_nat
+cimport lib
+from lib cimport is_null_datetimelike
 
 cdef int64_t iNaT = get_nat()
 
@@ -2096,7 +2098,7 @@ def groupby_float64(ndarray[float64_t] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
@@ -2124,7 +2126,7 @@ def groupby_float32(ndarray[float32_t] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
@@ -2152,7 +2154,7 @@ def groupby_object(ndarray[object] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
@@ -2180,7 +2182,7 @@ def groupby_int32(ndarray[int32_t] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
@@ -2208,7 +2210,7 @@ def groupby_int64(ndarray[int64_t] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
@@ -2236,7 +2238,7 @@ def groupby_bool(ndarray[uint8_t] index, ndarray labels):
     for i in range(length):
         key = util.get_value_1d(labels, i)
 
-        if _checknull(key):
+        if is_null_datetimelike(key):
             continue
 
         idx = index[i]
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index c308308603167..0789e20df3945 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -699,7 +699,6 @@ def test_get_group(self):
         expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1])
         assert_panel_equal(gp, expected)
 
-
         # GH 5267
         # be datelike friendly
         df = DataFrame({'DATE' : pd.to_datetime(['10-Oct-2013', '10-Oct-2013', '10-Oct-2013',
@@ -2837,6 +2836,49 @@ def test_groupby_list_infer_array_like(self):
         result = df.groupby(['foo', 'bar']).mean()
         expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
 
+    def test_groupby_nat_exclude(self):
+        # GH 6992
+        df = pd.DataFrame({'values': np.random.randn(8),
+                   'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp('2013-02-01'),
+                          np.nan, pd.Timestamp('2013-02-01'), np.nan, pd.Timestamp('2013-01-01')],
+                    'str': [np.nan, 'a', np.nan, 'a',
+                          np.nan, 'a', np.nan, 'b']})
+        grouped = df.groupby('dt')
+
+        expected = [[1, 7], [3, 5]]
+        keys = sorted(grouped.groups.keys())
+        self.assertEqual(len(keys), 2)
+        for k, e in zip(keys, expected):
+            # grouped.groups keys are np.datetime64 with system tz
+            # not to be affected by tz, only compare values
+            self.assertEqual(grouped.groups[k], e)
+
+        # confirm obj is not filtered
+        tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
+        self.assertEqual(grouped.ngroups, 2)
+        expected = {Timestamp('2013-01-01 00:00:00'): np.array([1, 7]),
+                    Timestamp('2013-02-01 00:00:00'): np.array([3, 5])}
+        for k in grouped.indices:
+            self.assert_numpy_array_equal(grouped.indices[k], expected[k])
+
+        tm.assert_frame_equal(grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]])
+        tm.assert_frame_equal(grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
+
+        self.assertRaises(KeyError, grouped.get_group, pd.NaT)
+
+        nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
+                            'nat': [pd.NaT, pd.NaT, pd.NaT]})
+        self.assertEqual(nan_df['nan'].dtype, 'float64')
+        self.assertEqual(nan_df['nat'].dtype, 'datetime64[ns]')
+
+        for key in ['nan', 'nat']:
+            grouped = nan_df.groupby(key)
+            self.assertEqual(grouped.groups, {})
+            self.assertEqual(grouped.ngroups, 0)
+            self.assertEqual(grouped.indices, {})
+            self.assertRaises(KeyError, grouped.get_group, np.nan)
+            self.assertRaises(KeyError, grouped.get_group, pd.NaT)
+
     def test_dictify(self):
         dict(iter(self.df.groupby('A')))
         dict(iter(self.df.groupby(['A', 'B'])))
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 444aa2a0bab1e..93299292cf353 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -1858,6 +1858,25 @@ def test_ufunc_compat(self):
         expected = Float64Index(np.sin(np.arange(5,dtype='int64')))
         tm.assert_index_equal(result, expected)
 
+    def test_index_groupby(self):
+        int_idx = Index(range(6))
+        float_idx = Index(np.arange(0, 0.6, 0.1))
+        obj_idx = Index('A B C D E F'.split())
+        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)
+
+        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
+            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
+            self.assertEqual(idx.groupby(to_groupby),
+                             {1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]]})
+
+            to_groupby = Index([datetime(2011, 11, 1), datetime(2011, 12, 1),
+                                pd.NaT, pd.NaT,
+                                datetime(2011, 12, 1), datetime(2011, 11, 1)], tz='UTC').values
+
+            ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp('2011-11-01'), Timestamp('2011-12-01')]))
+            expected = {ex_keys[0][0]: [idx[0], idx[5]], ex_keys[0][1]: [idx[1], idx[4]]}
+            self.assertEqual(idx.groupby(to_groupby), expected)
+
 
 class TestFloat64Index(Numeric, tm.TestCase):
     _holder = Float64Index

From 3cfd3e0364945d2e6813f836c51e74cffdd23831 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Tue, 5 May 2015 12:34:38 +0900
Subject: [PATCH 230/239] BUG: Series.align resets name when fill_value is
 specified

---
 doc/source/whatsnew/v0.17.0.txt |  4 ++++
 pandas/core/generic.py          | 21 ++++++++-------------
 pandas/tests/test_series.py     |  7 +++++++
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 5d4d149798d21..9b87c6c1332ab 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -79,8 +79,12 @@ Bug Fixes
 
 
 - Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
+
 - Bug in `plot` not defaulting to matplotlib `axes.grid` setting (:issue:`9792`)
 
+- Bug in ``Series.align`` resets ``name`` when ``fill_value`` is specified (:issue:`10067`)
+
+
 
 - Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b747f0a2ceacb..d6c7d87bb25b1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3365,11 +3365,10 @@ def _align_series(self, other, join='outer', axis=None, level=None,
                                                          level=level,
                                                          return_indexers=True)
 
-            left_result = self._reindex_indexer(join_index, lidx, copy)
-            right_result = other._reindex_indexer(join_index, ridx, copy)
+            left = self._reindex_indexer(join_index, lidx, copy)
+            right = other._reindex_indexer(join_index, ridx, copy)
 
         else:
-
             # one has > 1 ndim
             fdata = self._data
             if axis == 0:
@@ -3399,23 +3398,19 @@ def _align_series(self, other, join='outer', axis=None, level=None,
             if copy and fdata is self._data:
                 fdata = fdata.copy()
 
-            left_result = DataFrame(fdata)
+            left = DataFrame(fdata)
 
             if ridx is None:
-                right_result = other
+                right = other
             else:
-                right_result = other.reindex(join_index, level=level)
+                right = other.reindex(join_index, level=level)
 
         # fill
         fill_na = notnull(fill_value) or (method is not None)
         if fill_na:
-            return (left_result.fillna(fill_value, method=method, limit=limit,
-                                       axis=fill_axis),
-                    right_result.fillna(fill_value, method=method,
-                                        limit=limit))
-        else:
-            return (left_result.__finalize__(self),
-                    right_result.__finalize__(other))
+            left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis)
+            right = right.fillna(fill_value, method=method, limit=limit)
+        return (left.__finalize__(self), right.__finalize__(other))
 
     _shared_docs['where'] = ("""
         Return an object of same shape as self and whose corresponding
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 925cfa875196c..5a5b5fa2b226b 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5932,6 +5932,10 @@ def _check_align(a, b, how='left', fill=None):
 
             assert_series_equal(aa, ea)
             assert_series_equal(ab, eb)
+            self.assertEqual(aa.name, 'ts')
+            self.assertEqual(ea.name, 'ts')
+            self.assertEqual(ab.name, 'ts')
+            self.assertEqual(eb.name, 'ts')
 
         for kind in JOIN_TYPES:
             _check_align(self.ts[2:], self.ts[:-5], how=kind)
@@ -5939,12 +5943,15 @@ def _check_align(a, b, how='left', fill=None):
 
             # empty left
             _check_align(self.ts[:0], self.ts[:-5], how=kind)
+            _check_align(self.ts[:0], self.ts[:-5], how=kind, fill=-1)
 
             # empty right
             _check_align(self.ts[:-5], self.ts[:0], how=kind)
+            _check_align(self.ts[:-5], self.ts[:0], how=kind, fill=-1)
 
             # both empty
             _check_align(self.ts[:0], self.ts[:0], how=kind)
+            _check_align(self.ts[:0], self.ts[:0], how=kind, fill=-1)
 
     def test_align_fill_method(self):
         def _check_align(a, b, how='left', method='pad', limit=None):

From 5cd51335bc030921a4f929b10856941cdfa97f55 Mon Sep 17 00:00:00 2001
From: Mortada Mehyar <mortada.mehyar@gmail.com>
Date: Sat, 16 May 2015 16:42:20 -0700
Subject: [PATCH 231/239] BUG: mean overflows for integer dtypes (fixes #10155)

---
 doc/source/whatsnew/v0.17.0.txt |  1 +
 pandas/core/nanops.py           | 23 ++++++++++++++++-------
 pandas/tests/test_nanops.py     | 28 +++++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 9b87c6c1332ab..7fc5c1b86efeb 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -65,6 +65,7 @@ Bug Fixes
 - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
 
 
+- Bug in ``mean()`` where integer dtypes can overflow (:issue:`10172`)
 - Bug where Panel.from_dict does not set dtype when specified (:issue:`10058`)
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index e921a9d562bc1..0df160618b7c3 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -20,7 +20,7 @@
                                 is_complex_dtype, is_integer_dtype,
                                 is_bool_dtype, is_object_dtype,
                                 is_datetime64_dtype, is_timedelta64_dtype,
-                                is_datetime_or_timedelta_dtype,
+                                is_datetime_or_timedelta_dtype, _get_dtype,
                                 is_int_or_datetime_dtype, is_any_int_dtype)
 
 
@@ -254,8 +254,16 @@ def nansum(values, axis=None, skipna=True):
 @bottleneck_switch()
 def nanmean(values, axis=None, skipna=True):
     values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
-    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
-    count = _get_counts(mask, axis)
+
+    dtype_sum = dtype_max
+    dtype_count = np.float64
+    if is_integer_dtype(dtype):
+        dtype_sum = np.float64
+    elif is_float_dtype(dtype):
+        dtype_sum = dtype
+        dtype_count = dtype
+    count = _get_counts(mask, axis, dtype=dtype_count)
+    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
 
     if axis is not None and getattr(the_sum, 'ndim', False):
         the_mean = the_sum / count
@@ -557,15 +565,16 @@ def _maybe_arg_null_out(result, axis, mask, skipna):
     return result
 
 
-def _get_counts(mask, axis):
+def _get_counts(mask, axis, dtype=float):
+    dtype = _get_dtype(dtype)
     if axis is None:
-        return float(mask.size - mask.sum())
+        return dtype.type(mask.size - mask.sum())
 
     count = mask.shape[axis] - mask.sum(axis)
     try:
-        return count.astype(float)
+        return count.astype(dtype)
     except AttributeError:
-        return np.array(count, dtype=float)
+        return np.array(count, dtype=dtype)
 
 
 def _maybe_null_out(result, axis, mask):
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 2a605cba8a6c0..1adb8a5d9217c 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from pandas.core.common import isnull
+from pandas.core.common import isnull, is_integer_dtype
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 
@@ -323,6 +323,32 @@ def test_nanmean(self):
                         allow_complex=False, allow_obj=False,
                         allow_str=False, allow_date=False, allow_tdelta=True)
 
+    def test_nanmean_overflow(self):
+        # GH 10155
+        # In the previous implementation mean can overflow for int dtypes, it
+        # is now consistent with numpy
+        from pandas import Series
+
+        # numpy < 1.9.0 is not computing this correctly
+        from distutils.version import LooseVersion
+        if LooseVersion(np.__version__) >= '1.9.0':
+            for a in [2 ** 55, -2 ** 55, 20150515061816532]:
+                s = Series(a, index=range(500), dtype=np.int64)
+                result = s.mean()
+                np_result = s.values.mean()
+                self.assertEqual(result, a)
+                self.assertEqual(result, np_result)
+                self.assertTrue(result.dtype == np.float64)
+
+        # check returned dtype
+        for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]:
+            s = Series(range(10), dtype=dtype)
+            result = s.mean()
+            if is_integer_dtype(dtype):
+                self.assertTrue(result.dtype == np.float64)
+            else:
+                self.assertTrue(result.dtype == dtype)
+
     def test_nanmedian(self):
         self.check_funs(nanops.nanmedian, np.median,
                         allow_complex=False, allow_str=False, allow_date=False,

From a2ba3afbae7ec7c70a4c2dd7402123096cb24b08 Mon Sep 17 00:00:00 2001
From: austinc <craig.austin@aqr.com>
Date: Thu, 5 Feb 2015 18:37:24 -0500
Subject: [PATCH 232/239] ENH: Don't infer WOM-5MON if we don't support it
 (#9425)

---
 doc/source/whatsnew/v0.17.0.txt          |  1 +
 pandas/tseries/frequencies.py            |  4 +++-
 pandas/tseries/tests/test_frequencies.py | 10 ++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 7fc5c1b86efeb..dae1342c3cd76 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -90,3 +90,4 @@ Bug Fixes
 - Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
 
 
+- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index d0d71c63183fa..4af8c68110978 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -935,7 +935,9 @@ def _get_wom_rule(self):
             return None
 
         week_of_months = unique((self.index.day - 1) // 7)
-        if len(week_of_months) > 1:
+        # Only attempt to infer up to WOM-4. See #9425
+        week_of_months = week_of_months[week_of_months < 4]
+        if len(week_of_months) == 0 or len(week_of_months) > 1:
             return None
 
         # get which week
diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py
index 2f2d249539b81..823c762c692e5 100644
--- a/pandas/tseries/tests/test_frequencies.py
+++ b/pandas/tseries/tests/test_frequencies.py
@@ -212,6 +212,16 @@ def test_week_of_month(self):
             for i in range(1, 5):
                 self._check_generated_range('1/1/2000', 'WOM-%d%s' % (i, day))
 
+    def test_fifth_week_of_month(self):
+        # Only supports freq up to WOM-4. See #9425
+        func = lambda: date_range('2014-01-01', freq='WOM-5MON')
+        self.assertRaises(ValueError, func)
+
+    def test_fifth_week_of_month_infer(self):
+        # Only attempts to infer up to WOM-4. See #9425
+        index = DatetimeIndex(["2014-03-31", "2014-06-30", "2015-03-30"])
+        assert frequencies.infer_freq(index) is None
+
     def test_week_of_month_fake(self):
         #All of these dates are on same day of week and are 4 or 5 weeks apart
         index = DatetimeIndex(["2013-08-27","2013-10-01","2013-10-29","2013-11-26"])

From f30a851052aca253ddedc8be57b6e74caa830bce Mon Sep 17 00:00:00 2001
From: Bernard Willers <bwillers@gmail.com>
Date: Thu, 28 May 2015 21:07:35 -0400
Subject: [PATCH 233/239] BUG:  Holiday(..) with both offset and observance
 raises NotImplementedError #10217

GH10217
---
 doc/source/whatsnew/v0.17.0.txt |  2 ++
 pandas/tests/test_tseries.py    | 13 +++++++++++++
 pandas/tseries/holiday.py       |  3 +++
 3 files changed, 18 insertions(+)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index dae1342c3cd76..95d9fd3a3b806 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -38,6 +38,8 @@ Backwards incompatible API changes
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
+- ``Holiday`` now raises ``NotImplementedError`` if both ``offset`` and ``observance`` are used in constructor. (:issue:`102171`)
+
 .. _whatsnew_0170.deprecations:
 
 Deprecations
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 1b796ed2d83d1..035b3ac07342d 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -9,6 +9,8 @@
 import pandas.lib as lib
 import pandas._period as period
 import pandas.algos as algos
+from pandas.tseries.holiday import Holiday, SA, next_monday
+from pandas import DateOffset
 
 
 class TestTseriesUtil(tm.TestCase):
@@ -737,6 +739,17 @@ def test_get_period_field_raises_on_out_of_range(self):
     def test_get_period_field_array_raises_on_out_of_range(self):
         self.assertRaises(ValueError, period.get_period_field_arr, -1, np.empty(1), 0)
 
+
+class TestHolidayConflictingArguments(tm.TestCase):
+
+    # GH 10217
+
+    def test_both_offset_observance_raises(self):
+
+        with self.assertRaises(NotImplementedError) as cm:
+            h = Holiday("Cyber Monday", month=11, day=1,
+                        offset=[DateOffset(weekday=SA(4))], observance=next_monday)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index 799be98a329fa..f55569302ca05 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -148,6 +148,9 @@ class from pandas.tseries.offsets
         >>> July3rd = Holiday('July 3rd', month=7, day=3,
                               days_of_week=(0, 1, 2, 3))
         """
+        if offset is not None and observance is not None:
+            raise NotImplementedError("Cannot use both offset and observance.")
+
         self.name = name
         self.year = year
         self.month = month

From 6da6f568e164a6825fd7c02d24fa9faf7a6d2c01 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 16 May 2015 05:55:11 +0900
Subject: [PATCH 234/239] BUG: Index.union cannot handle array-likes

---
 doc/source/whatsnew/v0.17.0.txt |   2 +
 pandas/core/index.py            |  84 +++++++--------
 pandas/tests/test_index.py      | 184 +++++++++++++++++++++++++++++---
 pandas/tseries/index.py         |   2 +
 pandas/tseries/period.py        |   2 +
 pandas/tseries/tdi.py           |   7 +-
 6 files changed, 220 insertions(+), 61 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 95d9fd3a3b806..f96a2954f98a2 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -69,6 +69,7 @@ Bug Fixes
 
 - Bug in ``mean()`` where integer dtypes can overflow (:issue:`10172`)
 - Bug where Panel.from_dict does not set dtype when specified (:issue:`10058`)
+- Bug in ``Index.union`` raises ``AttributeError`` when passing array-likes. (:issue:`10149`)
 - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
 - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
 
@@ -93,3 +94,4 @@ Bug Fixes
 
 
 - Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
+
diff --git a/pandas/core/index.py b/pandas/core/index.py
index de30fee4009f4..2bd96fcec2e42 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -580,8 +580,18 @@ def to_datetime(self, dayfirst=False):
             return DatetimeIndex(self.values)
 
     def _assert_can_do_setop(self, other):
+        if not com.is_list_like(other):
+            raise TypeError('Input must be Index or array-like')
         return True
 
+    def _convert_can_do_setop(self, other):
+        if not isinstance(other, Index):
+            other = Index(other, name=self.name)
+            result_name = self.name
+        else:
+            result_name = self.name if self.name == other.name else None
+        return other, result_name
+
     @property
     def nlevels(self):
         return 1
@@ -1364,16 +1374,14 @@ def union(self, other):
         -------
         union : Index
         """
-        if not hasattr(other, '__iter__'):
-            raise TypeError('Input must be iterable.')
+        self._assert_can_do_setop(other)
+        other = _ensure_index(other)
 
         if len(other) == 0 or self.equals(other):
             return self
 
         if len(self) == 0:
-            return _ensure_index(other)
-
-        self._assert_can_do_setop(other)
+            return other
 
         if not is_dtype_equal(self.dtype,other.dtype):
             this = self.astype('O')
@@ -1439,11 +1447,7 @@ def intersection(self, other):
         -------
         intersection : Index
         """
-        if not hasattr(other, '__iter__'):
-            raise TypeError('Input must be iterable!')
-
         self._assert_can_do_setop(other)
-
         other = _ensure_index(other)
 
         if self.equals(other):
@@ -1492,18 +1496,12 @@ def difference(self, other):
 
         >>> index.difference(index2)
         """
-
-        if not hasattr(other, '__iter__'):
-            raise TypeError('Input must be iterable!')
+        self._assert_can_do_setop(other)
 
         if self.equals(other):
             return Index([], name=self.name)
 
-        if not isinstance(other, Index):
-            other = np.asarray(other)
-            result_name = self.name
-        else:
-            result_name = self.name if self.name == other.name else None
+        other, result_name = self._convert_can_do_setop(other)
 
         theDiff = sorted(set(self) - set(other))
         return Index(theDiff, name=result_name)
@@ -1517,7 +1515,7 @@ def sym_diff(self, other, result_name=None):
         Parameters
         ----------
 
-        other : array-like
+        other : Index or array-like
         result_name : str
 
         Returns
@@ -1545,13 +1543,10 @@ def sym_diff(self, other, result_name=None):
         >>> idx1 ^ idx2
         Int64Index([1, 5], dtype='int64')
         """
-        if not hasattr(other, '__iter__'):
-            raise TypeError('Input must be iterable!')
-
-        if not isinstance(other, Index):
-            other = Index(other)
-            result_name = result_name or self.name
-
+        self._assert_can_do_setop(other)
+        other, result_name_update = self._convert_can_do_setop(other)
+        if result_name is None:
+            result_name = result_name_update
         the_diff = sorted(set((self.difference(other)).union(other.difference(self))))
         return Index(the_diff, name=result_name)
 
@@ -5460,12 +5455,11 @@ def union(self, other):
         >>> index.union(index2)
         """
         self._assert_can_do_setop(other)
+        other, result_names = self._convert_can_do_setop(other)
 
         if len(other) == 0 or self.equals(other):
             return self
 
-        result_names = self.names if self.names == other.names else None
-
         uniq_tuples = lib.fast_unique_multiple([self.values, other.values])
         return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
                                       names=result_names)
@@ -5483,12 +5477,11 @@ def intersection(self, other):
         Index
         """
         self._assert_can_do_setop(other)
+        other, result_names = self._convert_can_do_setop(other)
 
         if self.equals(other):
             return self
 
-        result_names = self.names if self.names == other.names else None
-
         self_tuples = self.values
         other_tuples = other.values
         uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
@@ -5509,18 +5502,10 @@ def difference(self, other):
         diff : MultiIndex
         """
         self._assert_can_do_setop(other)
+        other, result_names = self._convert_can_do_setop(other)
 
-        if not isinstance(other, MultiIndex):
-            if len(other) == 0:
+        if len(other) == 0:
                 return self
-            try:
-                other = MultiIndex.from_tuples(other)
-            except:
-                raise TypeError('other must be a MultiIndex or a list of'
-                                ' tuples')
-            result_names = self.names
-        else:
-            result_names = self.names if self.names == other.names else None
 
         if self.equals(other):
             return MultiIndex(levels=[[]] * self.nlevels,
@@ -5537,15 +5522,30 @@ def difference(self, other):
             return MultiIndex.from_tuples(difference, sortorder=0,
                                           names=result_names)
 
-    def _assert_can_do_setop(self, other):
-        pass
-
     def astype(self, dtype):
         if not is_object_dtype(np.dtype(dtype)):
             raise TypeError('Setting %s dtype to anything other than object '
                             'is not supported' % self.__class__)
         return self._shallow_copy()
 
+    def _convert_can_do_setop(self, other):
+        result_names = self.names
+
+        if not hasattr(other, 'names'):
+            if len(other) == 0:
+                other = MultiIndex(levels=[[]] * self.nlevels,
+                                   labels=[[]] * self.nlevels,
+                                   verify_integrity=False)
+            else:
+                msg = 'other must be a MultiIndex or a list of tuples'
+                try:
+                    other = MultiIndex.from_tuples(other)
+                except:
+                    raise TypeError(msg)
+        else:
+            result_names = self.names if self.names == other.names else None
+        return other, result_names
+
     def insert(self, loc, item):
         """
         Make new MultiIndex inserting new item at location
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 93299292cf353..ed84c9764dd84 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -251,6 +251,136 @@ def test_take(self):
             expected = ind[indexer]
             self.assertTrue(result.equals(expected))
 
+    def test_setops_errorcases(self):
+        for name, idx in compat.iteritems(self.indices):
+            # # non-iterable input
+            cases = [0.5, 'xxx']
+            methods = [idx.intersection, idx.union, idx.difference, idx.sym_diff]
+
+            for method in methods:
+                for case in cases:
+                    assertRaisesRegexp(TypeError,
+                                       "Input must be Index or array-like",
+                                       method, case)
+
+    def test_intersection_base(self):
+        for name, idx in compat.iteritems(self.indices):
+            first = idx[:5]
+            second = idx[:3]
+            intersect = first.intersection(second)
+
+            if isinstance(idx, CategoricalIndex):
+                pass
+            else:
+                self.assertTrue(tm.equalContents(intersect, second))
+
+            # GH 10149
+            cases = [klass(second.values) for klass in [np.array, Series, list]]
+            for case in cases:
+                if isinstance(idx, PeriodIndex):
+                    msg = "can only call with other PeriodIndex-ed objects"
+                    with tm.assertRaisesRegexp(ValueError, msg):
+                        result = first.intersection(case)
+                elif isinstance(idx, CategoricalIndex):
+                    pass
+                else:
+                    result = first.intersection(case)
+                    self.assertTrue(tm.equalContents(result, second))
+
+            if isinstance(idx, MultiIndex):
+                msg = "other must be a MultiIndex or a list of tuples"
+                with tm.assertRaisesRegexp(TypeError, msg):
+                    result = first.intersection([1, 2, 3])
+
+    def test_union_base(self):
+        for name, idx in compat.iteritems(self.indices):
+            first = idx[3:]
+            second = idx[:5]
+            everything = idx
+            union = first.union(second)
+            self.assertTrue(tm.equalContents(union, everything))
+
+            # GH 10149
+            cases = [klass(second.values) for klass in [np.array, Series, list]]
+            for case in cases:
+                if isinstance(idx, PeriodIndex):
+                    msg = "can only call with other PeriodIndex-ed objects"
+                    with tm.assertRaisesRegexp(ValueError, msg):
+                        result = first.union(case)
+                elif isinstance(idx, CategoricalIndex):
+                    pass
+                else:
+                    result = first.union(case)
+                    self.assertTrue(tm.equalContents(result, everything))
+
+            if isinstance(idx, MultiIndex):
+                msg = "other must be a MultiIndex or a list of tuples"
+                with tm.assertRaisesRegexp(TypeError, msg):
+                    result = first.union([1, 2, 3])
+
+    def test_difference_base(self):
+        for name, idx in compat.iteritems(self.indices):
+            first = idx[2:]
+            second = idx[:4]
+            answer = idx[4:]
+            result = first.difference(second)
+
+            if isinstance(idx, CategoricalIndex):
+                pass
+            else:
+                self.assertTrue(tm.equalContents(result, answer))
+
+            # GH 10149
+            cases = [klass(second.values) for klass in [np.array, Series, list]]
+            for case in cases:
+                if isinstance(idx, PeriodIndex):
+                    msg = "can only call with other PeriodIndex-ed objects"
+                    with tm.assertRaisesRegexp(ValueError, msg):
+                        result = first.difference(case)
+                elif isinstance(idx, CategoricalIndex):
+                    pass
+                elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
+                    self.assertEqual(result.__class__, answer.__class__)
+                    self.assert_numpy_array_equal(result.asi8, answer.asi8)
+                else:
+                    result = first.difference(case)
+                    self.assertTrue(tm.equalContents(result, answer))
+
+            if isinstance(idx, MultiIndex):
+                msg = "other must be a MultiIndex or a list of tuples"
+                with tm.assertRaisesRegexp(TypeError, msg):
+                    result = first.difference([1, 2, 3])
+
+    def test_symmetric_diff(self):
+        for name, idx in compat.iteritems(self.indices):
+            first = idx[1:]
+            second = idx[:-1]
+            if isinstance(idx, CategoricalIndex):
+                pass
+            else:
+                answer = idx[[0, -1]]
+                result = first.sym_diff(second)
+                self.assertTrue(tm.equalContents(result, answer))
+
+            # GH 10149
+            cases = [klass(second.values) for klass in [np.array, Series, list]]
+            for case in cases:
+                if isinstance(idx, PeriodIndex):
+                    msg = "can only call with other PeriodIndex-ed objects"
+                    with tm.assertRaisesRegexp(ValueError, msg):
+                        result = first.sym_diff(case)
+                elif isinstance(idx, CategoricalIndex):
+                    pass
+                else:
+                    result = first.sym_diff(case)
+                    self.assertTrue(tm.equalContents(result, answer))
+
+            if isinstance(idx, MultiIndex):
+                msg = "other must be a MultiIndex or a list of tuples"
+                with tm.assertRaisesRegexp(TypeError, msg):
+                    result = first.sym_diff([1, 2, 3])
+
+
 class TestIndex(Base, tm.TestCase):
     _holder = Index
     _multiprocess_can_split_ = True
@@ -620,16 +750,12 @@ def test_intersection(self):
         first = self.strIndex[:20]
         second = self.strIndex[:10]
         intersect = first.intersection(second)
-
         self.assertTrue(tm.equalContents(intersect, second))
 
         # Corner cases
         inter = first.intersection(first)
         self.assertIs(inter, first)
 
-        # non-iterable input
-        assertRaisesRegexp(TypeError, "iterable", first.intersection, 0.5)
-
         idx1 = Index([1, 2, 3, 4, 5], name='idx')
         # if target has the same name, it is preserved
         idx2 = Index([3, 4, 5, 6, 7], name='idx')
@@ -671,6 +797,12 @@ def test_union(self):
         union = first.union(second)
         self.assertTrue(tm.equalContents(union, everything))
 
+        # GH 10149
+        cases = [klass(second.values) for klass in [np.array, Series, list]]
+        for case in cases:
+            result = first.union(case)
+            self.assertTrue(tm.equalContents(result, everything))
+
         # Corner cases
         union = first.union(first)
         self.assertIs(union, first)
@@ -681,9 +813,6 @@ def test_union(self):
         union = Index([]).union(first)
         self.assertIs(union, first)
 
-        # non-iterable input
-        assertRaisesRegexp(TypeError, "iterable", first.union, 0.5)
-
         # preserve names
         first.name = 'A'
         second.name = 'A'
@@ -792,11 +921,7 @@ def test_difference(self):
         self.assertEqual(len(result), 0)
         self.assertEqual(result.name, first.name)
 
-        # non-iterable input
-        assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5)
-
     def test_symmetric_diff(self):
-
         # smoke
         idx1 = Index([1, 2, 3, 4], name='idx1')
         idx2 = Index([2, 3, 4, 5])
@@ -842,10 +967,6 @@ def test_symmetric_diff(self):
         self.assertTrue(tm.equalContents(result, expected))
         self.assertEqual(result.name, 'new_name')
 
-        # other isn't iterable
-        with tm.assertRaises(TypeError):
-            Index(idx1,dtype='object').difference(1)
-
     def test_is_numeric(self):
         self.assertFalse(self.dateIndex.is_numeric())
         self.assertFalse(self.strIndex.is_numeric())
@@ -1786,6 +1907,7 @@ def test_equals(self):
         self.assertFalse(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca')))
         self.assertTrue(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca') + [np.nan]))
 
+
 class Numeric(Base):
 
     def test_numeric_compat(self):
@@ -2661,6 +2783,36 @@ def test_time_overflow_for_32bit_machines(self):
         idx2 = pd.date_range(end='2000', periods=periods, freq='S')
         self.assertEqual(len(idx2), periods)
 
+    def test_intersection(self):
+        first = self.index
+        second = self.index[5:]
+        intersect = first.intersection(second)
+        self.assertTrue(tm.equalContents(intersect, second))
+
+        # GH 10149
+        cases = [klass(second.values) for klass in [np.array, Series, list]]
+        for case in cases:
+            result = first.intersection(case)
+            self.assertTrue(tm.equalContents(result, second))
+
+        third = Index(['a', 'b', 'c'])
+        result = first.intersection(third)
+        expected = pd.Index([], dtype=object)
+        self.assert_index_equal(result, expected)
+
+    def test_union(self):
+        first = self.index[:5]
+        second = self.index[5:]
+        everything = self.index
+        union = first.union(second)
+        self.assertTrue(tm.equalContents(union, everything))
+
+        # GH 10149
+        cases = [klass(second.values) for klass in [np.array, Series, list]]
+        for case in cases:
+            result = first.union(case)
+            self.assertTrue(tm.equalContents(result, everything))
+
 
 class TestPeriodIndex(DatetimeLike, tm.TestCase):
     _holder = PeriodIndex
@@ -2671,7 +2823,7 @@ def setUp(self):
         self.setup_indices()
 
     def create_index(self):
-        return period_range('20130101',periods=5,freq='D')
+        return period_range('20130101', periods=5, freq='D')
 
     def test_pickle_compat_construction(self):
         pass
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index bd0869b9525b7..745c536914e47 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -804,6 +804,7 @@ def union(self, other):
         -------
         y : Index or DatetimeIndex
         """
+        self._assert_can_do_setop(other)
         if not isinstance(other, DatetimeIndex):
             try:
                 other = DatetimeIndex(other)
@@ -1039,6 +1040,7 @@ def intersection(self, other):
         -------
         y : Index or DatetimeIndex
         """
+        self._assert_can_do_setop(other)
         if not isinstance(other, DatetimeIndex):
             try:
                 other = DatetimeIndex(other)
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 510887a185054..6627047f0c335 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -679,6 +679,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
         return self._apply_meta(result)
 
     def _assert_can_do_setop(self, other):
+        super(PeriodIndex, self)._assert_can_do_setop(other)
+
         if not isinstance(other, PeriodIndex):
             raise ValueError('can only call with other PeriodIndex-ed objects')
 
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
index 1443c22909689..de68dd763d68c 100644
--- a/pandas/tseries/tdi.py
+++ b/pandas/tseries/tdi.py
@@ -436,12 +436,12 @@ def union(self, other):
         -------
         y : Index or TimedeltaIndex
         """
-        if _is_convertible_to_index(other):
+        self._assert_can_do_setop(other)
+        if not isinstance(other, TimedeltaIndex):
             try:
                 other = TimedeltaIndex(other)
-            except TypeError:
+            except (TypeError, ValueError):
                 pass
-
         this, other = self, other
 
         if this._can_fast_union(other):
@@ -581,6 +581,7 @@ def intersection(self, other):
         -------
         y : Index or TimedeltaIndex
         """
+        self._assert_can_do_setop(other)
         if not isinstance(other, TimedeltaIndex):
             try:
                 other = TimedeltaIndex(other)

From 3b2ca490e33657ac7ad322f426fabcd64aa23f97 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 31 May 2015 05:27:09 +0900
Subject: [PATCH 235/239] BUG: Series arithmetic methods incorrectly hold name

---
 doc/source/whatsnew/v0.17.0.txt |  2 +-
 pandas/core/series.py           |  7 ++++++-
 pandas/tests/test_series.py     | 19 ++++++++++++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index f96a2954f98a2..d590d43b5cca2 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -77,7 +77,7 @@ Bug Fixes
 - Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
 - Bug in display datetimes with mixed frequencies uniformly; display 'ms' datetimes to the proper precision. (:issue:`10170`)
 
-
+- Bung in ``Series`` arithmetic methods may incorrectly hold names (:issue:`10068`)
 
 - Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6367fb4fe0396..c54bd96f64c73 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1508,7 +1508,12 @@ def _binop(self, other, func, level=None, fill_value=None):
 
         result = func(this_vals, other_vals)
         name = _maybe_match_name(self, other)
-        return self._constructor(result, index=new_index).__finalize__(self)
+        result = self._constructor(result, index=new_index, name=name)
+        result = result.__finalize__(self)
+        if name is None:
+            # When name is None, __finalize__ overwrites current name
+            result.name = None
+        return result
 
     def combine(self, other, func, fill_value=nan):
         """
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 5a5b5fa2b226b..bbe942e607faf 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -264,10 +264,11 @@ def test_tab_completion(self):
         self.assertTrue('dt' not in dir(s))
 
     def test_binop_maybe_preserve_name(self):
-
         # names match, preserve
         result = self.ts * self.ts
         self.assertEqual(result.name, self.ts.name)
+        result = self.ts.mul(self.ts)
+        self.assertEqual(result.name, self.ts.name)
 
         result = self.ts * self.ts[:-2]
         self.assertEqual(result.name, self.ts.name)
@@ -277,6 +278,22 @@ def test_binop_maybe_preserve_name(self):
         cp.name = 'something else'
         result = self.ts + cp
         self.assertIsNone(result.name)
+        result = self.ts.add(cp)
+        self.assertIsNone(result.name)
+
+        ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow']
+        ops = ops + ['r' + op for op in ops]
+        for op in ops:
+            # names match, preserve
+            s = self.ts.copy()
+            result = getattr(s, op)(s)
+            self.assertEqual(result.name, self.ts.name)
+
+            # names don't match, don't preserve
+            cp = self.ts.copy()
+            cp.name = 'changed'
+            result = getattr(s, op)(cp)
+            self.assertIsNone(result.name)
 
     def test_combine_first_name(self):
         result = self.ts.combine_first(self.ts[:5])

From 726aff527bf9b4491f8b5cb171e674bca31d7d5f Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 31 May 2015 05:06:51 +0900
Subject: [PATCH 236/239] BUG: SparseSeries.abs() resets name

---
 doc/source/whatsnew/v0.17.0.txt    |  2 +-
 pandas/sparse/series.py            |  2 +-
 pandas/sparse/tests/test_sparse.py | 15 +++++++++++++++
 pandas/tests/test_panel.py         |  2 ++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index d590d43b5cca2..feaaad179ff54 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -87,7 +87,7 @@ Bug Fixes
 - Bug in `plot` not defaulting to matplotlib `axes.grid` setting (:issue:`9792`)
 
 - Bug in ``Series.align`` resets ``name`` when ``fill_value`` is specified (:issue:`10067`)
-
+- Bug in ``SparseSeries.abs`` resets ``name`` (:issue:`10241`)
 
 
 - Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
index 2c328e51b5090..f53cc66bee961 100644
--- a/pandas/sparse/series.py
+++ b/pandas/sparse/series.py
@@ -399,7 +399,7 @@ def abs(self):
         res_sp_values = np.abs(self.sp_values)
         return self._constructor(res_sp_values, index=self.index,
                                  sparse_index=self.sp_index,
-                                 fill_value=self.fill_value)
+                                 fill_value=self.fill_value).__finalize__(self)
 
     def get(self, label, default=None):
         """
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index dd1d10f3d15ed..a7a78ba226a0b 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -509,6 +509,21 @@ def _check_inplace_op(iop, op):
             _check_inplace_op(
                 getattr(operator, "i%s" % op), getattr(operator, op))
 
+    def test_abs(self):
+        s = SparseSeries([1, 2, -3], name='x')
+        expected = SparseSeries([1, 2, 3], name='x')
+        result = s.abs()
+        assert_sp_series_equal(result, expected)
+        self.assertEqual(result.name, 'x')
+
+        result = abs(s)
+        assert_sp_series_equal(result, expected)
+        self.assertEqual(result.name, 'x')
+
+        result = np.abs(s)
+        assert_sp_series_equal(result, expected)
+        self.assertEqual(result.name, 'x')
+
     def test_reindex(self):
         def _compare_with_series(sps, new_index):
             spsre = sps.reindex(new_index)
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index d7d83887298b1..57fd465993e14 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -404,6 +404,8 @@ def test_abs(self):
         expected = np.abs(s)
         assert_series_equal(result, expected)
         assert_series_equal(result2, expected)
+        self.assertEqual(result.name, 'A')
+        self.assertEqual(result2.name, 'A')
 
 
 class CheckIndexing(object):

From 1af1d34e82a8b3817fcb78656dbb51b7ddcddd73 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 2 Jun 2015 15:42:43 -0400
Subject: [PATCH 237/239] DOC: move whatsnew from 0.17.0 -> 0.16.2

---
 doc/source/whatsnew.rst                       |  2 +-
 .../whatsnew/{v0.17.0.txt => v0.16.2.txt}     | 40 +++++++------------
 2 files changed, 15 insertions(+), 27 deletions(-)
 rename doc/source/whatsnew/{v0.17.0.txt => v0.16.2.txt} (72%)

diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 24cee99a5d072..c8e32ac2a3309 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,7 +18,7 @@ What's New
 
 These are new features and improvements of note in each release.
 
-.. include:: whatsnew/v0.17.0.txt
+.. include:: whatsnew/v0.16.2.txt
 
 .. include:: whatsnew/v0.16.1.txt
 
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.16.2.txt
similarity index 72%
rename from doc/source/whatsnew/v0.17.0.txt
rename to doc/source/whatsnew/v0.16.2.txt
index feaaad179ff54..b571aab0b19a5 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.16.2.txt
@@ -1,63 +1,52 @@
-.. _whatsnew_0170:
+.. _whatsnew_0162:
 
-v0.17.0 (July ??, 2015)
+v0.16.2 (June 12, 2015)
 -----------------------
 
-This is a major release from 0.16.1 and includes a small number of API changes, several new features,
-enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
-users upgrade to this version.
+This is a minor bug-fix release from 0.16.1 and includes a a large number of
+bug fixes along several new features, enhancements, and performance improvements.
+We recommend that all users upgrade to this version.
 
 Highlights include:
 
+Check the :ref:`API Changes <whatsnew_0162.api>` before updating.
 
-Check the :ref:`API Changes <whatsnew_0170.api>` and :ref:`deprecations <whatsnew_0170.deprecations>` before updating.
-
-.. contents:: What's new in v0.17.0
+.. contents:: What's new in v0.16.2
     :local:
     :backlinks: none
 
-.. _whatsnew_0170.enhancements:
+.. _whatsnew_0162.enhancements:
 
 New features
 ~~~~~~~~~~~~
 
-.. _whatsnew_0170.enhancements.other:
+.. _whatsnew_0162.enhancements.other:
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
-.. _whatsnew_0170.api:
+.. _whatsnew_0162.api:
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _whatsnew_0170.api_breaking:
+.. _whatsnew_0162.api_breaking:
 
-.. _whatsnew_0170.api_breaking.other:
+.. _whatsnew_0162.api_breaking.other:
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
 - ``Holiday`` now raises ``NotImplementedError`` if both ``offset`` and ``observance`` are used in constructor. (:issue:`102171`)
 
-.. _whatsnew_0170.deprecations:
-
-Deprecations
-^^^^^^^^^^^^
-
-.. _whatsnew_0170.prior_deprecations:
-
-Removal of prior version deprecations/changes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. _whatsnew_0170.performance:
+.. _whatsnew_0162.performance:
 
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Improved ``Series.resample`` performance with dtype=datetime64[ns] (:issue:`7754`)
 
-.. _whatsnew_0170.bug_fixes:
+.. _whatsnew_0162.bug_fixes:
 
 Bug Fixes
 ~~~~~~~~~
@@ -94,4 +83,3 @@ Bug Fixes
 
 
 - Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
-

From d19facc32f934e38763cea1098ee873a048b4ea3 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 2 Jun 2015 17:01:46 -0400
Subject: [PATCH 238/239] DOC: add in whatsnew/0.17.0.txt

---
 doc/source/whatsnew/v0.17.0.txt | 59 +++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.17.0.txt

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
new file mode 100644
index 0000000000000..87a9d197bd0d1
--- /dev/null
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -0,0 +1,59 @@
+.. _whatsnew_0170:
+
+v0.17.0 (July 31, 2015)
+-----------------------
+
+This is a major release from 0.16.2 and includes a small number of API changes, several new features,
+enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
+users upgrade to this version.
+
+Highlights include:
+
+
+Check the :ref:`API Changes <whatsnew_0170.api>` and :ref:`deprecations <whatsnew_0170.deprecations>` before updating.
+
+.. contents:: What's new in v0.17.0
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0170.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+.. _whatsnew_0170.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.api:
+
+Backwards incompatible API changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_0170.api_breaking:
+
+.. _whatsnew_0170.api_breaking.other:
+
+Other API Changes
+^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.deprecations:
+
+Deprecations
+^^^^^^^^^^^^
+
+.. _whatsnew_0170.prior_deprecations:
+
+Removal of prior version deprecations/changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0170.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_0170.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~

From ce55b8bf5edb970a8f11f0e6d102022b43fee7a3 Mon Sep 17 00:00:00 2001
From: Sam Ruth <samruth529@gmail.com>
Date: Tue, 2 Jun 2015 20:41:05 -0400
Subject: [PATCH 239/239] Moved bug fix to v0.17.0.txt

---
 doc/source/whatsnew/v0.17.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 87a9d197bd0d1..6ad108dc020c2 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -57,3 +57,4 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+fixed bug in csv parsing when using a converting that specified uint8 (:issue: '9266')