Skip to content

Commit dd81b32

Browse files
committed
BUG: Allow value labels to be read with iterator
All value labels to be read before the iterator has been used closes pandas-dev#16923
1 parent 6000c5b commit dd81b32

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

pandas/io/stata.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,7 @@ def __init__(self, path_or_buf, convert_dates=True,
997997
self.path_or_buf = BytesIO(contents)
998998

999999
self._read_header()
1000+
self._setup_dtype()
10001001

10011002
def __enter__(self):
10021003
""" enter context manager """
@@ -1299,6 +1300,20 @@ def _read_old_header(self, first_char):
12991300
# necessary data to continue parsing
13001301
self.data_location = self.path_or_buf.tell()
13011302

1303+
def _setup_dtype(self):
1304+
# Setup the dtype.
1305+
if self._dtype is None:
1306+
dtype = [] # Convert struct data types to numpy data type
1307+
for i, typ in enumerate(self.typlist):
1308+
if typ in self.NUMPY_TYPE_MAP:
1309+
dtype.append(('s' + str(i), self.byteorder +
1310+
self.NUMPY_TYPE_MAP[typ]))
1311+
else:
1312+
dtype.append(('s' + str(i), 'S' + str(typ)))
1313+
dtype = np.dtype(dtype)
1314+
self._dtype = dtype
1315+
return self._dtype
1316+
13021317
def _calcsize(self, fmt):
13031318
return (type(fmt) is int and fmt or
13041319
struct.calcsize(self.byteorder + fmt))
@@ -1408,6 +1423,7 @@ def _read_strls(self):
14081423
# Wrap v_o in a string to allow uint64 values as keys on 32bit OS
14091424
self.GSO[str(v_o)] = va
14101425

1426+
14111427
# legacy
14121428
@Appender(_data_method_doc)
14131429
def data(self, **kwargs):
@@ -1476,20 +1492,9 @@ def read(self, nrows=None, convert_dates=None,
14761492
self._can_read_value_labels = True
14771493
self._read_strls()
14781494

1479-
# Setup the dtype.
1480-
if self._dtype is None:
1481-
dtype = [] # Convert struct data types to numpy data type
1482-
for i, typ in enumerate(self.typlist):
1483-
if typ in self.NUMPY_TYPE_MAP:
1484-
dtype.append(('s' + str(i), self.byteorder +
1485-
self.NUMPY_TYPE_MAP[typ]))
1486-
else:
1487-
dtype.append(('s' + str(i), 'S' + str(typ)))
1488-
dtype = np.dtype(dtype)
1489-
self._dtype = dtype
14901495

14911496
# Read data
1492-
dtype = self._dtype
1497+
dtype = self._setup_dtype()
14931498
max_read_len = (self.nobs - self._lines_read) * dtype.itemsize
14941499
read_len = nrows * dtype.itemsize
14951500
read_len = min(read_len, max_read_len)

0 commit comments

Comments
 (0)