Skip to content

Commit babb386

Browse files
committed
TYP: make the type annotations of read_csv & read_table discoverable
1 parent 6a6faf5 commit babb386

File tree

2 files changed

+177
-102
lines changed

2 files changed

+177
-102
lines changed

pandas/io/parsers.py

+155-102
Original file line numberDiff line numberDiff line change
@@ -530,10 +530,16 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
530530
_deprecated_args: Set[str] = set()
531531

532532

533-
def _make_parser_function(name, default_sep=","):
534-
def parser_f(
533+
@Appender(
534+
_doc_read_csv_and_table.format(
535+
func_name="read_csv",
536+
summary="Read a comma-separated values (csv) file into DataFrame.",
537+
_default_sep="','",
538+
)
539+
)
540+
def read_csv(
535541
filepath_or_buffer: FilePathOrBuffer,
536-
sep=default_sep,
542+
sep=",",
537543
delimiter=None,
538544
# Column and Index Locations and Names
539545
header="infer",
@@ -589,117 +595,164 @@ def parser_f(
589595
low_memory=_c_parser_defaults["low_memory"],
590596
memory_map=False,
591597
float_precision=None,
592-
):
593-
594-
# gh-23761
595-
#
596-
# When a dialect is passed, it overrides any of the overlapping
597-
# parameters passed in directly. We don't want to warn if the
598-
# default parameters were passed in (since it probably means
599-
# that the user didn't pass them in explicitly in the first place).
600-
#
601-
# "delimiter" is the annoying corner case because we alias it to
602-
# "sep" before doing comparison to the dialect values later on.
603-
# Thus, we need a flag to indicate that we need to "override"
604-
# the comparison to dialect values by checking if default values
605-
# for BOTH "delimiter" and "sep" were provided.
606-
if dialect is not None:
607-
sep_override = delimiter is None and sep == default_sep
608-
kwds = dict(sep_override=sep_override)
609-
else:
610-
kwds = dict()
611-
612-
# Alias sep -> delimiter.
613-
if delimiter is None:
614-
delimiter = sep
615-
616-
if delim_whitespace and delimiter != default_sep:
617-
raise ValueError(
618-
"Specified a delimiter with both sep and "
619-
"delim_whitespace=True; you can only specify one."
620-
)
598+
):
599+
# gh-23761
600+
#
601+
# When a dialect is passed, it overrides any of the overlapping
602+
# parameters passed in directly. We don't want to warn if the
603+
# default parameters were passed in (since it probably means
604+
# that the user didn't pass them in explicitly in the first place).
605+
#
606+
# "delimiter" is the annoying corner case because we alias it to
607+
# "sep" before doing comparison to the dialect values later on.
608+
# Thus, we need a flag to indicate that we need to "override"
609+
# the comparison to dialect values by checking if default values
610+
# for BOTH "delimiter" and "sep" were provided.
611+
default_sep = ","
612+
613+
if dialect is not None:
614+
sep_override = delimiter is None and sep == default_sep
615+
kwds = dict(sep_override=sep_override)
616+
else:
617+
kwds = dict()
621618

622-
if engine is not None:
623-
engine_specified = True
624-
else:
625-
engine = "c"
626-
engine_specified = False
619+
# Alias sep -> delimiter.
620+
if delimiter is None:
621+
delimiter = sep
627622

628-
kwds.update(
629-
delimiter=delimiter,
630-
engine=engine,
631-
dialect=dialect,
632-
compression=compression,
633-
engine_specified=engine_specified,
634-
doublequote=doublequote,
635-
escapechar=escapechar,
636-
quotechar=quotechar,
637-
quoting=quoting,
638-
skipinitialspace=skipinitialspace,
639-
lineterminator=lineterminator,
640-
header=header,
641-
index_col=index_col,
642-
names=names,
643-
prefix=prefix,
644-
skiprows=skiprows,
645-
skipfooter=skipfooter,
646-
na_values=na_values,
647-
true_values=true_values,
648-
false_values=false_values,
649-
keep_default_na=keep_default_na,
650-
thousands=thousands,
651-
comment=comment,
652-
decimal=decimal,
653-
parse_dates=parse_dates,
654-
keep_date_col=keep_date_col,
655-
dayfirst=dayfirst,
656-
date_parser=date_parser,
657-
cache_dates=cache_dates,
658-
nrows=nrows,
659-
iterator=iterator,
660-
chunksize=chunksize,
661-
converters=converters,
662-
dtype=dtype,
663-
usecols=usecols,
664-
verbose=verbose,
665-
encoding=encoding,
666-
squeeze=squeeze,
667-
memory_map=memory_map,
668-
float_precision=float_precision,
669-
na_filter=na_filter,
670-
delim_whitespace=delim_whitespace,
671-
warn_bad_lines=warn_bad_lines,
672-
error_bad_lines=error_bad_lines,
673-
low_memory=low_memory,
674-
mangle_dupe_cols=mangle_dupe_cols,
675-
infer_datetime_format=infer_datetime_format,
676-
skip_blank_lines=skip_blank_lines,
623+
if delim_whitespace and delimiter != default_sep:
624+
raise ValueError(
625+
"Specified a delimiter with both sep and "
626+
"delim_whitespace=True; you can only specify one."
677627
)
678628

679-
return _read(filepath_or_buffer, kwds)
680-
681-
parser_f.__name__ = name
682-
683-
return parser_f
629+
if engine is not None:
630+
engine_specified = True
631+
else:
632+
engine = "c"
633+
engine_specified = False
634+
635+
kwds.update(
636+
delimiter=delimiter,
637+
engine=engine,
638+
dialect=dialect,
639+
compression=compression,
640+
engine_specified=engine_specified,
641+
doublequote=doublequote,
642+
escapechar=escapechar,
643+
quotechar=quotechar,
644+
quoting=quoting,
645+
skipinitialspace=skipinitialspace,
646+
lineterminator=lineterminator,
647+
header=header,
648+
index_col=index_col,
649+
names=names,
650+
prefix=prefix,
651+
skiprows=skiprows,
652+
skipfooter=skipfooter,
653+
na_values=na_values,
654+
true_values=true_values,
655+
false_values=false_values,
656+
keep_default_na=keep_default_na,
657+
thousands=thousands,
658+
comment=comment,
659+
decimal=decimal,
660+
parse_dates=parse_dates,
661+
keep_date_col=keep_date_col,
662+
dayfirst=dayfirst,
663+
date_parser=date_parser,
664+
cache_dates=cache_dates,
665+
nrows=nrows,
666+
iterator=iterator,
667+
chunksize=chunksize,
668+
converters=converters,
669+
dtype=dtype,
670+
usecols=usecols,
671+
verbose=verbose,
672+
encoding=encoding,
673+
squeeze=squeeze,
674+
memory_map=memory_map,
675+
float_precision=float_precision,
676+
na_filter=na_filter,
677+
delim_whitespace=delim_whitespace,
678+
warn_bad_lines=warn_bad_lines,
679+
error_bad_lines=error_bad_lines,
680+
low_memory=low_memory,
681+
mangle_dupe_cols=mangle_dupe_cols,
682+
infer_datetime_format=infer_datetime_format,
683+
skip_blank_lines=skip_blank_lines,
684+
)
684685

686+
return _read(filepath_or_buffer, kwds)
685687

686-
read_csv = _make_parser_function("read_csv", default_sep=",")
687-
read_csv = Appender(
688-
_doc_read_csv_and_table.format(
689-
func_name="read_csv",
690-
summary="Read a comma-separated values (csv) file into DataFrame.",
691-
_default_sep="','",
692-
)
693-
)(read_csv)
694688

695-
read_table = _make_parser_function("read_table", default_sep="\t")
696-
read_table = Appender(
689+
@Appender(
697690
_doc_read_csv_and_table.format(
698691
func_name="read_table",
699692
summary="Read general delimited file into DataFrame.",
700693
_default_sep=r"'\\t' (tab-stop)",
701694
)
702-
)(read_table)
695+
)
696+
def read_table(
697+
filepath_or_buffer: FilePathOrBuffer,
698+
sep="\t",
699+
delimiter=None,
700+
# Column and Index Locations and Names
701+
header="infer",
702+
names=None,
703+
index_col=None,
704+
usecols=None,
705+
squeeze=False,
706+
prefix=None,
707+
mangle_dupe_cols=True,
708+
# General Parsing Configuration
709+
dtype=None,
710+
engine=None,
711+
converters=None,
712+
true_values=None,
713+
false_values=None,
714+
skipinitialspace=False,
715+
skiprows=None,
716+
skipfooter=0,
717+
nrows=None,
718+
# NA and Missing Data Handling
719+
na_values=None,
720+
keep_default_na=True,
721+
na_filter=True,
722+
verbose=False,
723+
skip_blank_lines=True,
724+
# Datetime Handling
725+
parse_dates=False,
726+
infer_datetime_format=False,
727+
keep_date_col=False,
728+
date_parser=None,
729+
dayfirst=False,
730+
cache_dates=True,
731+
# Iteration
732+
iterator=False,
733+
chunksize=None,
734+
# Quoting, Compression, and File Format
735+
compression="infer",
736+
thousands=None,
737+
decimal: str = ".",
738+
lineterminator=None,
739+
quotechar='"',
740+
quoting=csv.QUOTE_MINIMAL,
741+
doublequote=True,
742+
escapechar=None,
743+
comment=None,
744+
encoding=None,
745+
dialect=None,
746+
# Error Handling
747+
error_bad_lines=True,
748+
warn_bad_lines=True,
749+
# Internal
750+
delim_whitespace=False,
751+
low_memory=_c_parser_defaults["low_memory"],
752+
memory_map=False,
753+
float_precision=None,
754+
):
755+
return read_csv(**locals())
703756

704757

705758
def read_fwf(

pandas/tests/io/parser/test_common.py

+22
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import codecs
66
import csv
77
from datetime import datetime
8+
from inspect import signature
89
from io import StringIO
910
import os
1011
import platform
@@ -2071,6 +2072,27 @@ def test_read_csv_raises_on_header_prefix(all_parsers):
20712072
parser.read_csv(s, header=0, prefix="_X")
20722073

20732074

2075+
def test_read_table_same_signature_as_read_csv(all_parsers):
2076+
# GH-XXXXXX
2077+
from pandas import read_csv, read_table
2078+
table_sign = signature(read_table)
2079+
csv_sign = signature(read_csv)
2080+
2081+
assert table_sign.parameters.keys() == csv_sign.parameters.keys()
2082+
assert table_sign.return_annotation == csv_sign.return_annotation
2083+
2084+
for key, csv_param in csv_sign.parameters.items():
2085+
table_param = table_sign.parameters[key]
2086+
if key == "sep":
2087+
assert csv_param.default == ","
2088+
assert table_param.default == "\t"
2089+
assert table_param.annotation == csv_param.annotation
2090+
assert table_param.kind == csv_param.kind
2091+
continue
2092+
else:
2093+
assert table_param == csv_param
2094+
2095+
20742096
def test_read_table_equivalency_to_read_csv(all_parsers):
20752097
# see gh-21948
20762098
# As of 0.25.0, read_table is undeprecated

0 commit comments

Comments
 (0)