Skip to content

Commit

Permalink
Merge pull request #30 from craigds/bin-script
Browse files Browse the repository at this point in the history
Add `cchardetect` CLI script
  • Loading branch information
PyYoshi authored May 15, 2017
2 parents c991f86 + 186fb86 commit d51386c
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 2 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dist
build
eggs
parts
bin
var
sdist
develop-eggs
Expand Down
1 change: 0 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
- Implement cli tool (like chardet cli)
- Improve docs
32 changes: 32 additions & 0 deletions bin/cchardetect
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python
from __future__ import print_function, unicode_literals
import argparse
import cchardet


def read_chunks(f, chunk_size):
chunk = f.read(chunk_size)
while chunk:
yield chunk
chunk = f.read(chunk_size)


def main():
parser = argparse.ArgumentParser()
parser.add_argument('files', nargs='+', help="Files to detect encoding of", type=argparse.FileType('rb'))
parser.add_argument('--chunk-size', type=int, default=(256 * 1024))
args = parser.parse_args()

for f in args.files:
detector = cchardet.UniversalDetector()
for chunk in read_chunks(f, args.chunk_size):
detector.feed(chunk)
detector.close()
print('{file.name}: {result[encoding]} with confidence {result[confidence]}'.format(
file=f,
result=detector.result
))


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def read(f):
cmdclass={'build_ext': build_ext},
package_dir={'': 'src'},
packages=['cchardet', ],
scripts=['bin/cchardetect'],
ext_modules=[
cchardet_module
],
Expand Down

0 comments on commit d51386c

Please sign in to comment.