From d18bd5b75f9b7d49a3c8c105cd95362a24ec6e31 Mon Sep 17 00:00:00 2001 From: Jeremiah Gowdy Date: Sun, 4 Oct 2020 18:45:54 -0700 Subject: [PATCH] Add new byte-order-marker checker/fixer --- .pre-commit-hooks.yaml | 10 ++++++-- README.md | 7 +++--- pre_commit_hooks/fix_byte_order_marker.py | 30 +++++++++++++++++++++++ setup.cfg | 1 + tests/fix_byte_order_marker_test.py | 13 ++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 pre_commit_hooks/fix_byte_order_marker.py create mode 100644 tests/fix_byte_order_marker_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e4dc9ea..a47f7339 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -17,8 +17,8 @@ language: python types: [python] - id: check-byte-order-marker - name: Check for byte-order marker - description: Forbid files which have a UTF-8 byte-order marker + name: 'check BOM - deprecated: use fix-byte-order-marker' + description: forbid files which have a UTF-8 byte-order marker entry: check-byte-order-marker language: python types: [text] @@ -131,6 +131,12 @@ entry: file-contents-sorter language: python files: '^$' +- id: fix-byte-order-marker + name: fix UTF-8 byte order marker + description: removes UTF-8 byte order marker + entry: fix-byte-order-marker + language: python + types: [text] - id: fix-encoding-pragma name: Fix python encoding pragma language: python diff --git a/README.md b/README.md index a6b62abd..18340bf7 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,6 @@ Require literal syntax when initializing empty or zero Python builtin types. - Ignore this requirement for specific builtin types with `--ignore=type1,type2,…`. - Forbid `dict` keyword syntax with `--no-allow-dict-kwargs`. -#### `check-byte-order-marker` -Forbid files which have a UTF-8 byte-order marker - #### `check-case-conflict` Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT. @@ -102,6 +99,9 @@ This hook replaces double quoted strings with single quoted strings. #### `end-of-file-fixer` Makes sure files end in a newline and only a newline. +#### `fix-byte-order-marker` +removes UTF-8 byte order marker + #### `fix-encoding-pragma` Add `# -*- coding: utf-8 -*-` to the top of python files. - To remove the coding pragma pass `--remove` (useful in a python3-only codebase) @@ -183,6 +183,7 @@ Trims trailing whitespace. [mirrors-autopep8](https://github.com/pre-commit/mirrors-autopep8) - `pyflakes`: instead use `flake8` - `flake8`: instead use [upstream flake8](https://gitlab.com/pycqa/flake8) +- `check-byte-order-marker`: instead use fix-byte-order-marker ### As a standalone package diff --git a/pre_commit_hooks/fix_byte_order_marker.py b/pre_commit_hooks/fix_byte_order_marker.py new file mode 100644 index 00000000..1ffe047d --- /dev/null +++ b/pre_commit_hooks/fix_byte_order_marker.py @@ -0,0 +1,30 @@ +import argparse +from typing import Optional +from typing import Sequence + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + with open(filename, 'rb') as f_b: + bts = f_b.read(3) + + if bts == b'\xef\xbb\xbf': + with open(filename, newline='', encoding='utf-8-sig') as f: + contents = f.read() + with open(filename, 'w', newline='', encoding='utf-8') as f: + f.write(contents) + + print(f'{filename}: removed byte-order marker') + retv = 1 + + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.cfg b/setup.cfg index 47b8bb6d..c8677f56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,6 +48,7 @@ console_scripts = double-quote-string-fixer = pre_commit_hooks.string_fixer:main end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:main file-contents-sorter = pre_commit_hooks.file_contents_sorter:main + fix-byte-order-marker = pre_commit_hooks.fix_byte_order_marker:main fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main mixed-line-ending = pre_commit_hooks.mixed_line_ending:main diff --git a/tests/fix_byte_order_marker_test.py b/tests/fix_byte_order_marker_test.py new file mode 100644 index 00000000..da150e37 --- /dev/null +++ b/tests/fix_byte_order_marker_test.py @@ -0,0 +1,13 @@ +from pre_commit_hooks import fix_byte_order_marker + + +def test_failure(tmpdir): + f = tmpdir.join('f.txt') + f.write_text('ohai', encoding='utf-8-sig') + assert fix_byte_order_marker.main((str(f),)) == 1 + + +def test_success(tmpdir): + f = tmpdir.join('f.txt') + f.write_text('ohai', encoding='utf-8') + assert fix_byte_order_marker.main((str(f),)) == 0