Skip to content

Commit

Permalink
BUG: read_csv: fix wrong exception on permissions issue (pandas-dev#3…
Browse files Browse the repository at this point in the history
…2737)

* Generate exception from the C code in the proper manner

Get rid of all error printf's and produce proper Python exceptions

* Declare some more exceptions from C code

* Remove special case error message for c parser

* Add whatsnew entry

* Fix missing semicolons

* Add regression test

* Remove special case handling for Windows

PyErr_SetFromErrnoWithFilename works for Unix and Windows

* Remove call to GetLastError(), when using 0, the python error code handles this

* black fixes

* Fix indentation of assert statement (also in previous test, same error)

* Skip the test on windows

* Fix black issue

* Let new_mmap fail without exception to allow fallback

* Do not create a python error in new_mmap to allow the fallback to work silently

* Remove the NULL pointer check for new_rd_source now that it will raise an exception

* Update doc/source/whatsnew/v1.1.0.rst

Co-Authored-By: gfyoung <gfyoung17+GitHub@gmail.com>

Co-authored-by: Jeff Reback <jeff@reback.net>
Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>
  • Loading branch information
3 people authored and jbrockmendel committed Mar 21, 2020
1 parent bc24a8c commit 628513e
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ I/O
- Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`)
- Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`)
- Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`)
- Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`)


Plotting
Expand Down
18 changes: 2 additions & 16 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ cdef extern from "parser/io.h":
void* buffer_mmap_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status)

void *new_file_source(char *fname, size_t buffer_size)
void *new_file_source(char *fname, size_t buffer_size) except NULL

void *new_rd_source(object obj)
void *new_rd_source(object obj) except NULL

int del_file_source(void *src)
int del_rd_source(void *src)
Expand Down Expand Up @@ -667,26 +667,12 @@ cdef class TextReader:
ptr = new_file_source(source, self.parser.chunksize)
self.parser.cb_io = &buffer_file_bytes
self.parser.cb_cleanup = &del_file_source

if ptr == NULL:
if not os.path.exists(source):

raise FileNotFoundError(
ENOENT,
f'File {usource} does not exist',
usource)
raise IOError('Initializing from file failed')

self.parser.source = ptr

elif hasattr(source, 'read'):
# e.g., StringIO

ptr = new_rd_source(source)
if ptr == NULL:
raise IOError('Initializing parser from file-like '
'object failed')

self.parser.source = ptr
self.parser.cb_io = &buffer_rd_bytes
self.parser.cb_cleanup = &del_rd_source
Expand Down
19 changes: 11 additions & 8 deletions pandas/_libs/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The full license is in the LICENSE file, distributed with this software.
void *new_file_source(char *fname, size_t buffer_size) {
file_source *fs = (file_source *)malloc(sizeof(file_source));
if (fs == NULL) {
PyErr_NoMemory();
return NULL;
}

Expand All @@ -41,17 +42,20 @@ void *new_file_source(char *fname, size_t buffer_size) {
int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
if (required == 0) {
free(fs);
PyErr_SetFromWindowsErr(0);
return NULL;
}
wname = (wchar_t*)malloc(required * sizeof(wchar_t));
if (wname == NULL) {
free(fs);
PyErr_NoMemory();
return NULL;
}
if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) <
required) {
free(wname);
free(fs);
PyErr_SetFromWindowsErr(0);
return NULL;
}
fs->fd = _wopen(wname, O_RDONLY | O_BINARY);
Expand All @@ -62,6 +66,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
#endif
if (fs->fd == -1) {
free(fs);
PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname);
return NULL;
}

Expand All @@ -71,6 +76,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
if (fs->buffer == NULL) {
close(fs->fd);
free(fs);
PyErr_NoMemory();
return NULL;
}

Expand All @@ -83,6 +89,10 @@ void *new_file_source(char *fname, size_t buffer_size) {
void *new_rd_source(PyObject *obj) {
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));

if (rds == NULL) {
PyErr_NoMemory();
return NULL;
}
/* hold on to this object */
Py_INCREF(obj);
rds->obj = obj;
Expand Down Expand Up @@ -220,20 +230,15 @@ void *new_mmap(char *fname) {

mm = (memory_map *)malloc(sizeof(memory_map));
if (mm == NULL) {
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
return (NULL);
return NULL;
}
mm->fd = open(fname, O_RDONLY | O_BINARY);
if (mm->fd == -1) {
fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n",
fname, errno);
free(mm);
return NULL;
}

if (fstat(mm->fd, &stat) == -1) {
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n",
errno);
close(mm->fd);
free(mm);
return NULL;
Expand All @@ -242,8 +247,6 @@ void *new_mmap(char *fname) {

mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0);
if (mm->memmap == MAP_FAILED) {
/* XXX Eventually remove this print statement. */
fprintf(stderr, "new_file_buffer: mmap() failed.\n");
close(mm->fd);
free(mm);
return NULL;
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,13 +960,23 @@ def test_nonexistent_path(all_parsers):
parser = all_parsers
path = f"{tm.rands(10)}.csv"

msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]"
msg = r"\[Errno 2\]"
with pytest.raises(FileNotFoundError, match=msg) as e:
parser.read_csv(path)
assert path == e.value.filename

filename = e.value.filename

assert path == filename
@td.skip_if_windows # os.chmod does not work in windows
def test_no_permission(all_parsers):
# GH 23784
parser = all_parsers

msg = r"\[Errno 13\]"
with tm.ensure_clean() as path:
os.chmod(path, 0) # make file unreadable
with pytest.raises(PermissionError, match=msg) as e:
parser.read_csv(path)
assert path == e.value.filename


def test_missing_trailing_delimiters(all_parsers):
Expand Down

0 comments on commit 628513e

Please sign in to comment.