Skip to content

Commit

Permalink
Add support for running detectors from command line
Browse files Browse the repository at this point in the history
  • Loading branch information
GjjvdBurg committed Aug 15, 2020
1 parent bb3787c commit 9b7d4e6
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 19 deletions.
13 changes: 5 additions & 8 deletions comparison/detector_clevercsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
"""

import clevercsv
import sys

from utils import DetectionError, get_sample
from utils import DetectionError, get_sample, parse_args


def detector(gz_filename, encoding, n_lines=None):
Expand All @@ -31,10 +30,8 @@ def detector(gz_filename, encoding, n_lines=None):
escapechar=dialect.escapechar,
)


if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
encoding = clevercsv.utils.get_encoding(filename)
print(detector(filename, encoding))
else:
print(f"Usage: {sys.argv[0]} filename", file=sys.stderr)
args = parse_args()
encoding = clevercsv.utils.get_encoding(args.filename)
print(detector(args.filename, encoding, n_lines=args.n))
12 changes: 4 additions & 8 deletions comparison/detector_clevercsv_grow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
"""

import clevercsv
import sys

from utils import DetectionError, get_sample, count_lines
from utils import DetectionError, get_sample, count_lines, parse_args


def trailing_equality(l, k=4):
Expand Down Expand Up @@ -73,9 +72,6 @@ def detector(filename, encoding, n_lines=None, lines_start=100, n_equal=5):


if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
encoding = clevercsv.utils.get_encoding(filename)
print(detector(filename, encoding))
else:
print(f"Usage: {sys.argv[0]} filename", file=sys.stderr)
args = parse_args()
encoding = clevercsv.utils.get_encoding(args.filename)
print(detector(args.filename, encoding, n_lines=args.n))
11 changes: 10 additions & 1 deletion comparison/detector_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"""

from utils import get_sample

from utils import get_sample, parse_args


def detector(gz_filename, encoding, n_lines=None):
Expand All @@ -18,3 +19,11 @@ def detector(gz_filename, encoding, n_lines=None):
quotechar = '"'

return dict(delimiter=",", quotechar=quotechar, escapechar="")


if __name__ == "__main__":
from clevercsv.utils import get_encoding

args = parse_args()
encoding = get_encoding(args.filename)
print(detector(args.filename, encoding, n_lines=args.n))
9 changes: 8 additions & 1 deletion comparison/detector_sniffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import csv

from utils import DetectionError, get_sample
from utils import DetectionError, get_sample, parse_args


def detector(gz_filename, encoding, n_lines=None):
Expand All @@ -33,3 +33,10 @@ def detector(gz_filename, encoding, n_lines=None):
return None
dialect = dict(delimiter=delimiter, quotechar=quotechar, escapechar="")
return dialect

if __name__ == "__main__":
from clevercsv.utils import get_encoding

args = parse_args()
encoding = get_encoding(args.filename)
print(detector(args.filename, encoding, n_lines=args.n))
17 changes: 16 additions & 1 deletion comparison/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
Author: Gertjan van den Burg
"""

import gzip
import argparse
import chardet
import gzip


class DetectionError(Exception):
Expand Down Expand Up @@ -63,3 +64,17 @@ def count_lines(filename, encoding):

fp.close()
return n_lines


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-n",
help="Number of lines to use for detection",
default=None,
type=int,
)
parser.add_argument(
"filename", help="File to detect dialect for (.csv or .csv.gz)"
)
return parser.parse_args()

0 comments on commit 9b7d4e6

Please sign in to comment.