-
Notifications
You must be signed in to change notification settings - Fork 0
/
eatwhite.py
200 lines (160 loc) · 7.18 KB
/
eatwhite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!python
# Copyright 2010-2019 by David McAllister
from __future__ import print_function
import sys
if sys.hexversion < 0x03050000:
print('Python 3.5 or newer is required. Download it from python.org.')
exit(1)
import sys
import argparse
def red(text, **kwargs):
print('\033[31m', text, '\033[0m', sep='', **kwargs)
def green(text, **kwargs):
print('\033[32m', text, '\033[0m', sep='', **kwargs)
def yellow(text, **kwargs):
print('\033[33m', text, '\033[0m', sep='', **kwargs)
def replaceLoop(content, oldt, newt, verbose = False, printNL = None):
replCnt = content.count(oldt)
while replCnt > 0:
if verbose:
print(replCnt, end=' ')
content = content.replace(oldt, newt)
replCnt = content.count(oldt)
if verbose:
print(replCnt, end=printNL)
return content
def warnUnicode(content):
i = 0
didNL = False
for b in content:
if b >= 128:
print(('' if didNL else '\n'), 'Bad byte:', b, 'offset:', i)
didNL = True
i += 1
def findWavy(content):
'''Remove blank line before "}"'''
for nsp in range(0, 24, 4):
content = content.replace(b'\n\n' + b' ' * nsp + b'}', b'\n' + b' ' * nsp + b'}')
return content
def fixFileWhitespace(file_path, doCRLF, doWrite, doCollapseSpaces, nlPerParaIn, nlPerParaOut, verbose, printNL):
'''Get rid of all whitespace issues in the given file, with modes for source code and text files.'''
'''Can convert line endings to LF or CRLF. Can specify how to do paragraph endings.'''
with open(file_path, 'rb') as open_file:
content = open_file.read()
orig_content = content
if verbose:
print(
content.count(b'\r\n'), 'CRLF,',
content.count(b'\r'), 'CR,',
content.count(b'\n'), 'LF,',
content.count(b'\t'), 'TAB.', end=printNL)
if len(content) == 0:
print('Empty file')
return
content = content.replace(b'\xef\xbb\xbf', b'') # Remove unicode byte order mark
content = content.replace(b'\xe2\x80\x9c', b'"') # Unicode quotes
content = content.replace(b'\xe2\x80\x9d', b'"') # Unicode quotes
content = content.replace(b'\xe2\x80\x99', b'\'') # Unicode quotes
content = content.replace(b'\xe2\x80\x93', b'-') # Unicode hyphen
content = content.replace(b'\xc2\x85', b'\n') # Unicode new line
content = content.replace(b'\r\r\n', b'\n') # Workaround clang-format 8.0.1 bug that does this
content = content.replace(b'\r\n', b'\n') # Replace CRLF with LF
content = content.replace(b'\r', b'\n') # Replace rogue CR with LF
content = content.replace(b'\t', b' ') # Replace tab with space
if verbose:
print('Trailing space lines: ', end='')
content = replaceLoop(content, b' \n', b'\n', verbose, printNL) # Remove trailing spaces
# Search for any remaining unicode characters
warnUnicode(content)
content = findWavy(content)
# Do cleanup for .txt files that don't apply to code
if nlPerParaIn > 0:
# Paragraph Mode
# A paragraph break in input text is defined as nlPerParaIn newlines.
# Replace these with \r as a temporary paragraph break (since \r is now not in content).
content = content.replace(b'\n' * nlPerParaIn, b'\r')
# There may be remaining \n at end of file that were not enough to form a paragraph break.
# Remove file end whitespace
while content[-1] == ord('\n') or content[-1] == ord(' '):
content = content[0:-1]
# Remaining \n are line breaks within a paragraph. Replace with spaces.
content = content.replace(b'\n', b' ')
if verbose:
print('Multiple paragraph breaks: ', end='')
content = replaceLoop(content, b'\r\r', b'\r', verbose, printNL) # Collapse multiple paragraph breaks into one
else:
content = replaceLoop(content, b'\n\n\n', b'\n\n', verbose, printNL) # Collapse multiple blank lines into one
if doCollapseSpaces:
if verbose:
print('Multiple spaces: ', end='')
content = replaceLoop(content, b' ', b' ', verbose, printNL) # Remove multiple spaces.
if nlPerParaIn > 0:
# Paragraph Mode
content = content.replace(b'\r', b'\n' * nlPerParaOut) # Convert paragraph breaks back to nlPerParaOut newlines
# Make sure file ends with one newline
if content[-1] != ord('\n'):
content = content + b'\n'
# Remove paragraph indents
if doCollapseSpaces:
content = content.replace(b'\n ', b'\n') # No need to loop this since spaces have been collapsed
# Remove file leading whitespace
while content[0] == ord('\n') or content[0] == ord(' '):
content = content[1:]
# As we finish, switch to CRLF if desired
if doCRLF:
if verbose:
print('To CRLF', end=printNL)
content = content.replace(b'\n', b'\r\n') # Replace LF with CRLF
if verbose:
print('Orig length:', len(orig_content), 'new length:', len(content), end=printNL)
if content != orig_content:
if doWrite:
with open(file_path, 'wb') as open_file:
open_file.write(content)
yellow('Saved whitespace changes.', end='')
else:
red('Whitespace changes not saved.', end='')
else:
green('No whitespace changes needed.', end='')
def main():
parser = argparse.ArgumentParser(
description='eatwhite.py - Fix up CR, LF, tabs, and spaces in text files')
parser.add_argument('-n', '--no-write',
action='store_true',
default=False,
help='Do not write output files with changes')
parser.add_argument('-r', '--to-crlf',
dest='to_crlf',
action='store_true',
default=True,
help='Convert all lines to CRLF')
parser.add_argument('-l', '--to-lf',
dest='to_crlf',
action='store_false',
default=True,
help='Convert all lines to LF')
parser.add_argument('-c', '--collapse',
action='store_true',
default=False,
help='Collapse multiple spaces; remove leading spaces')
parser.add_argument('-p', '--paragraph',
nargs=2,
metavar=('NIN', 'NOUT'),
dest='para',
default=(0, 0),
help='Convert paragraph breaks from NIN newlines to NOUT newlines; convert newlines in paragraphs to spaces; collapse multiple newlines')
parser.add_argument('-q', '--quiet',
action='store_true',
default=False,
help='Say little')
parser.add_argument('fname',
nargs='+',
help='Files to convert')
args = parser.parse_args()
for fname in args.fname:
print(fname)
fixFileWhitespace(fname, args.to_crlf, not args.no_write, args.collapse,
int(args.para[0]), int(args.para[1]), not args.quiet, '\n')
print('\n')
if __name__ == "__main__":
main()