-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
DOC, ENH: Support memory_map for Python engine #13381
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
import os | ||
import csv | ||
import codecs | ||
import mmap | ||
import zipfile | ||
from contextlib import contextmanager, closing | ||
|
||
|
@@ -276,7 +277,7 @@ def ZipFile(*args, **kwargs): | |
ZipFile = zipfile.ZipFile | ||
|
||
|
||
def _get_handle(path, mode, encoding=None, compression=None): | ||
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False): | ||
"""Gets file handle for given path and mode. | ||
""" | ||
if compression is not None: | ||
|
@@ -324,9 +325,55 @@ def _get_handle(path, mode, encoding=None, compression=None): | |
else: | ||
f = open(path, mode) | ||
|
||
if memory_map and hasattr(f, 'fileno'): | ||
try: | ||
f = MMapWrapper(f) | ||
except Exception: | ||
# we catch any errors that may have occurred | ||
# because that is consistent with the lower-level | ||
# functionality of the C engine (pd.read_csv), so | ||
# leave the file handler as is then | ||
pass | ||
|
||
return f | ||
|
||
|
||
class MMapWrapper(BaseIterator): | ||
""" | ||
Wrapper for the Python's mmap class so that it can be properly read in | ||
by Python's csv.reader class. | ||
|
||
Parameters | ||
---------- | ||
f : file object | ||
File object to be mapped onto memory. Must support the 'fileno' | ||
method or have an equivalent attribute | ||
|
||
""" | ||
|
||
def __init__(self, f): | ||
self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) | ||
|
||
def __getattr__(self, name): | ||
return getattr(self.mmap, name) | ||
|
||
def __next__(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you are going to add this, then pls setup some tests for it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests added. |
||
newline = self.mmap.readline() | ||
|
||
# readline returns bytes, not str, in Python 3, | ||
# but Python's CSV reader expects str, so convert | ||
# the output to str before continuing | ||
if compat.PY3: | ||
newline = compat.bytes_to_str(newline) | ||
|
||
# mmap doesn't raise if reading past the allocated | ||
# data but instead returns an empty string, so raise | ||
# if that is returned | ||
if newline == '': | ||
raise StopIteration | ||
return newline | ||
|
||
|
||
class UTF8Recoder(BaseIterator): | ||
|
||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
a,b,c | ||
1,one,I | ||
2,two,II | ||
|
||
3,three,III |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -285,10 +285,6 @@ def test_usecols_dtypes(self): | |
self.assertTrue((result.dtypes == [object, np.int, np.float]).all()) | ||
self.assertTrue((result2.dtypes == [object, np.float]).all()) | ||
|
||
def test_memory_map(self): | ||
# it works! | ||
self.read_csv(self.csv1, memory_map=True) | ||
|
||
def test_disable_bool_parsing(self): | ||
# #2090 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you actually changing machinrery for c-engine? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No change for C engine. Just added stuff for Python. |
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
a,b,c | ||
1,one,I | ||
2,two,II | ||
3,three,III |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is doc-string updated?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes? I just added it.