Skip to content

Commit

Permalink
Merge pull request #107 from cmatKhan/patch_csv_sniffer
Browse files Browse the repository at this point in the history
fixing the csv sniffer auto detection of delimiters
  • Loading branch information
ejiawustl authored Aug 30, 2024
2 parents 1bf08eb + 2a75a72 commit d6e036b
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions yeastdnnexplorer/interface/AbstractRecordsAndFilesAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@ def _detect_delimiter(self, file_path: str, sample_size: int = 1024) -> str:
:rtype: str
:raises FileNotFoundError: If the file does not exist.
:raises gzip.BadGzipFile: If the file is not a valid gzip file.
:raises _csv.Error: If the CSV sniffer cannot determine the delimiter.
"""
try:
# by default, open() uses newline=False, which opens the file
# in universal newline mode and translates all new line characters
# to '\n'
file = (
gzip.open(file_path, "rt")
if file_path.endswith(".gz")
Expand All @@ -86,6 +90,15 @@ def _detect_delimiter(self, file_path: str, sample_size: int = 1024) -> str:
raise FileNotFoundError(f"File {file_path} not found.") from exc

sample = file.read(sample_size)

# In order to avoid errors in the csv sniffer, attempt to find the
# last newline character in the string
last_newline_index = sample.rfind("\n")
# if a newline character is found, trim the sample to the last newline
if last_newline_index != -1:
# Trim to the last complete line
sample = sample[:last_newline_index]

sniffer = csv.Sniffer()
dialect = sniffer.sniff(sample)
delimiter = dialect.delimiter
Expand Down

0 comments on commit d6e036b

Please sign in to comment.