diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index fbde1727fbb27..2db61a17858de 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -348,6 +348,7 @@ I/O - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) - Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) - Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`) +- Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`) Plotting diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 4f7d75e0aaad6..39195585ebfa6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -241,9 +241,9 @@ cdef extern from "parser/io.h": void* buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status) - void *new_file_source(char *fname, size_t buffer_size) + void *new_file_source(char *fname, size_t buffer_size) except NULL - void *new_rd_source(object obj) + void *new_rd_source(object obj) except NULL int del_file_source(void *src) int del_rd_source(void *src) @@ -667,26 +667,12 @@ cdef class TextReader: ptr = new_file_source(source, self.parser.chunksize) self.parser.cb_io = &buffer_file_bytes self.parser.cb_cleanup = &del_file_source - - if ptr == NULL: - if not os.path.exists(source): - - raise FileNotFoundError( - ENOENT, - f'File {usource} does not exist', - usource) - raise IOError('Initializing from file failed') - self.parser.source = ptr elif hasattr(source, 'read'): # e.g., StringIO ptr = new_rd_source(source) - if ptr == NULL: - raise IOError('Initializing parser from file-like ' - 'object failed') - self.parser.source = ptr self.parser.cb_io = &buffer_rd_bytes self.parser.cb_cleanup = &del_rd_source diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index 1e3295fcb6fc7..51504527de5a2 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -28,6 +28,7 @@ The full license is in the LICENSE file, distributed with this software. void *new_file_source(char *fname, size_t buffer_size) { file_source *fs = (file_source *)malloc(sizeof(file_source)); if (fs == NULL) { + PyErr_NoMemory(); return NULL; } @@ -41,17 +42,20 @@ void *new_file_source(char *fname, size_t buffer_size) { int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0); if (required == 0) { free(fs); + PyErr_SetFromWindowsErr(0); return NULL; } wname = (wchar_t*)malloc(required * sizeof(wchar_t)); if (wname == NULL) { free(fs); + PyErr_NoMemory(); return NULL; } if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) < required) { free(wname); free(fs); + PyErr_SetFromWindowsErr(0); return NULL; } fs->fd = _wopen(wname, O_RDONLY | O_BINARY); @@ -62,6 +66,7 @@ void *new_file_source(char *fname, size_t buffer_size) { #endif if (fs->fd == -1) { free(fs); + PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname); return NULL; } @@ -71,6 +76,7 @@ void *new_file_source(char *fname, size_t buffer_size) { if (fs->buffer == NULL) { close(fs->fd); free(fs); + PyErr_NoMemory(); return NULL; } @@ -83,6 +89,10 @@ void *new_file_source(char *fname, size_t buffer_size) { void *new_rd_source(PyObject *obj) { rd_source *rds = (rd_source *)malloc(sizeof(rd_source)); + if (rds == NULL) { + PyErr_NoMemory(); + return NULL; + } /* hold on to this object */ Py_INCREF(obj); rds->obj = obj; @@ -220,20 +230,15 @@ void *new_mmap(char *fname) { mm = (memory_map *)malloc(sizeof(memory_map)); if (mm == NULL) { - fprintf(stderr, "new_file_buffer: malloc() failed.\n"); - return (NULL); + return NULL; } mm->fd = open(fname, O_RDONLY | O_BINARY); if (mm->fd == -1) { - fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n", - fname, errno); free(mm); return NULL; } if (fstat(mm->fd, &stat) == -1) { - fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", - errno); close(mm->fd); free(mm); return NULL; @@ -242,8 +247,6 @@ void *new_mmap(char *fname) { mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); if (mm->memmap == MAP_FAILED) { - /* XXX Eventually remove this print statement. */ - fprintf(stderr, "new_file_buffer: mmap() failed.\n"); close(mm->fd); free(mm); return NULL; diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 0f3a5be76ae60..9de2ec9799353 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -960,13 +960,23 @@ def test_nonexistent_path(all_parsers): parser = all_parsers path = f"{tm.rands(10)}.csv" - msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]" + msg = r"\[Errno 2\]" with pytest.raises(FileNotFoundError, match=msg) as e: parser.read_csv(path) + assert path == e.value.filename - filename = e.value.filename - assert path == filename +@td.skip_if_windows # os.chmod does not work in windows +def test_no_permission(all_parsers): + # GH 23784 + parser = all_parsers + + msg = r"\[Errno 13\]" + with tm.ensure_clean() as path: + os.chmod(path, 0) # make file unreadable + with pytest.raises(PermissionError, match=msg) as e: + parser.read_csv(path) + assert path == e.value.filename def test_missing_trailing_delimiters(all_parsers):