@@ -21,16 +21,14 @@ from cython import Py_ssize_t
2121from cpython cimport (PyObject, PyBytes_FromString,
2222 PyBytes_AsString,
2323 PyUnicode_AsUTF8String,
24- PyErr_Occurred, PyErr_Fetch)
24+ PyErr_Occurred, PyErr_Fetch,
25+ PyUnicode_Decode)
2526from cpython.ref cimport Py_XDECREF
2627
2728
2829cdef extern from " Python.h" :
2930 object PyUnicode_FromString(char * v)
3031
31- object PyUnicode_Decode(char * v, Py_ssize_t size, char * encoding,
32- char * errors)
33-
3432
3533import numpy as np
3634cimport numpy as cnp
@@ -84,11 +82,6 @@ cdef extern from "headers/portable.h":
8482 # loudly.
8583 pass
8684
87- try :
88- basestring
89- except NameError :
90- basestring = str
91-
9285
9386cdef extern from " parser/tokenizer.h" :
9487
@@ -632,7 +625,7 @@ cdef class TextReader:
632625
633626 if self .compression:
634627 if self .compression == ' gzip' :
635- if isinstance (source, basestring ):
628+ if isinstance (source, str ):
636629 source = gzip.GzipFile(source, ' rb' )
637630 else :
638631 source = gzip.GzipFile(fileobj = source)
@@ -653,7 +646,7 @@ cdef class TextReader:
653646 raise ValueError (' Multiple files found in compressed '
654647 ' zip file %s ' , str (zip_names))
655648 elif self .compression == ' xz' :
656- if isinstance (source, basestring ):
649+ if isinstance (source, str ):
657650 source = lzma.LZMAFile(source, ' rb' )
658651 else :
659652 source = lzma.LZMAFile(filename = source)
@@ -671,11 +664,10 @@ cdef class TextReader:
671664
672665 self .handle = source
673666
674- if isinstance (source, basestring ):
675- if not isinstance (source, bytes):
676- encoding = sys.getfilesystemencoding() or " utf-8"
667+ if isinstance (source, str ):
668+ encoding = sys.getfilesystemencoding() or " utf-8"
677669
678- source = source.encode(encoding)
670+ source = source.encode(encoding)
679671
680672 if self .memory_map:
681673 ptr = new_mmap(source)
@@ -768,9 +760,7 @@ cdef class TextReader:
768760 for i in range (field_count):
769761 word = self .parser.words[start + i]
770762
771- if path == CSTRING:
772- name = PyBytes_FromString(word)
773- elif path == UTF8:
763+ if path == UTF8:
774764 name = PyUnicode_FromString(word)
775765 elif path == ENCODED:
776766 name = PyUnicode_Decode(word, strlen(word),
@@ -1309,9 +1299,6 @@ cdef class TextReader:
13091299 elif path == ENCODED:
13101300 return _string_box_decode(self .parser, i, start, end,
13111301 na_filter, na_hashset, self .c_encoding)
1312- elif path == CSTRING:
1313- return _string_box_factorize(self .parser, i, start, end,
1314- na_filter, na_hashset)
13151302
13161303 def _get_converter (self , i , name ):
13171304 if self .converters is None :
@@ -1389,7 +1376,7 @@ cdef:
13891376def _ensure_encoded (list lst ):
13901377 cdef list result = []
13911378 for x in lst:
1392- if isinstance (x, unicode ):
1379+ if isinstance (x, str ):
13931380 x = PyUnicode_AsUTF8String(x)
13941381 elif not isinstance (x, bytes):
13951382 x = str (x).encode(' utf-8' )
@@ -1421,7 +1408,6 @@ def _maybe_upcast(arr):
14211408
14221409
14231410cdef enum StringPath:
1424- CSTRING
14251411 UTF8
14261412 ENCODED
14271413
@@ -1663,10 +1649,6 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
16631649 for k in range (table.n_buckets):
16641650 if kh_exist_str(table, k):
16651651 result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
1666- elif path == CSTRING:
1667- for k in range (table.n_buckets):
1668- if kh_exist_str(table, k):
1669- result[table.vals[k]] = PyBytes_FromString(table.keys[k])
16701652
16711653 kh_destroy_str(table)
16721654 return np.asarray(codes), result, na_count
0 commit comments