rapidsai · rapids-bot · Nov 5, 2021 · Nov 5, 2021 · Nov 5, 2021
@@ -111,6 +111,7 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs):
         return read_csv_without_chunksize(path, **kwargs)
 
     dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV")
+    usecols = kwargs.pop("usecols", None)
     meta = dask_reader(filenames[0], **kwargs)._meta
 
     dsk = {}
@@ -130,11 +131,14 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs):
                     "names"
                 ] = meta.columns  # no header in the middle of the file
                 kwargs2["header"] = None
+            kwargs2["usecols"] = usecols
             dsk[(name, i)] = (apply, _read_csv, [fn, dtypes], kwargs2)
 
             i += 1
 
     divisions = [None] * (len(dsk) + 1)
+    if usecols is not None:
+        meta = meta[usecols]
     return dd.core.new_dd_object(dsk, name, meta, divisions)
 
 

@@ -134,3 +134,19 @@ def test_read_csv_chunksize_none(tmp_path, compression, size):
     df.to_csv(path, index=False, compression=compression)
     df2 = dask_cudf.read_csv(path, chunksize=None, dtype=typ)
     dd.assert_eq(df, df2)
+
+
+def test_csv_reader_usecols(tmp_path):
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4] * 100,
+            "b": ["a", "b", "c", "d"] * 100,
+            "c": [10, 11, 12, 13] * 100,
+        }
+    )
+    csv_path = str(tmp_path / "usecols_data.csv")
+    df.to_csv(csv_path, index=False)
+    ddf = dask_cudf.from_cudf(df[["b", "c"]], npartitions=5)
+    ddf2 = dask_cudf.read_csv(csv_path, usecols=["b", "c"])
+
+    dd.assert_eq(ddf, ddf2, check_divisions=False, check_index=False)