forked from rust-lang/book
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add spellchecker script and dictionary of valid words for it
Fixes rust-lang#118
- Loading branch information
Showing
3 changed files
with
290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
personal_ws-1.1 en 0 utf-8 | ||
abcabcabc | ||
abcd | ||
Addr | ||
alignof | ||
APIs | ||
aren | ||
backtrace | ||
BACKTRACE | ||
benchmarking | ||
bitand | ||
BitAnd | ||
bitor | ||
BitOr | ||
bitwise | ||
Bitwise | ||
bitxor | ||
BitXor | ||
bool | ||
boolean | ||
booleans | ||
Bors | ||
BuildHasher | ||
Cagain | ||
callsite | ||
CamelCase | ||
ChangeColor | ||
ChangeColorMessage | ||
chXX | ||
chYY | ||
config | ||
Config | ||
const | ||
copyeditor | ||
couldn | ||
cratesio | ||
cryptographically | ||
CStr | ||
CString | ||
ctrl | ||
Ctrl | ||
deallocated | ||
debuginfo | ||
deref | ||
Deref | ||
dereference | ||
Dereference | ||
dereferencing | ||
DerefMut | ||
destructure | ||
destructuring | ||
Destructuring | ||
didn | ||
Dobrý | ||
doccargo | ||
doccratesio | ||
doesn | ||
else's | ||
encodings | ||
enum | ||
Enum | ||
enums | ||
enum's | ||
Enums | ||
ErrorKind | ||
Executables | ||
extern | ||
FFFF | ||
filename | ||
Filename | ||
filesystem | ||
Filesystem | ||
formatter | ||
gitignore | ||
grapheme | ||
Grapheme | ||
growable | ||
hardcoded | ||
hardcoding | ||
hasher | ||
HashMap | ||
Haskell | ||
hasn | ||
helloworld | ||
Hmmm | ||
Hoare | ||
Hola | ||
html | ||
impl | ||
init | ||
instantiation | ||
InvalidDigit | ||
ioerror | ||
iokind | ||
ioresult | ||
iostdin | ||
IpAddr | ||
IpAddrKind | ||
irst | ||
isize | ||
iter | ||
judgement | ||
lang | ||
libc | ||
libcore | ||
libreoffice | ||
lifecycle | ||
loopback | ||
lval | ||
mathematic | ||
metaprogramming | ||
mibbit | ||
Mibbit | ||
mkdir | ||
monomorphization | ||
Monomorphization | ||
monomorphized | ||
MoveMessage | ||
Mutex | ||
namespace | ||
namespaced | ||
namespaces | ||
Noooooooooooooooooooooo | ||
NotFound | ||
null's | ||
OCaml | ||
offsetof | ||
OptionalFloatingPointNumber | ||
OptionalNumber | ||
OsStr | ||
OsString | ||
overread | ||
parameterize | ||
ParseIntError | ||
PartialEq | ||
PartialOrd | ||
powi | ||
preprocessing | ||
Preprocessing | ||
preprocessor | ||
println | ||
priv | ||
proc | ||
QuitMessage | ||
RAII | ||
randcrate | ||
READMEs | ||
rect | ||
Rectange | ||
redeclaring | ||
RefCell | ||
repr | ||
runtime | ||
Rustacean | ||
Rustaceans | ||
rustc | ||
rustdoc | ||
rustup | ||
semver | ||
SemVer | ||
shouldn | ||
sizeof | ||
someproject | ||
someusername | ||
SpreadsheetCell | ||
sqrt | ||
stackoverflow | ||
stdin | ||
Stdin | ||
stdlib | ||
struct | ||
Struct | ||
structs | ||
struct's | ||
Structs | ||
submodule | ||
submodules | ||
suboptimal | ||
subtree | ||
That'd | ||
TODO | ||
toml | ||
TOML | ||
tradeoff | ||
TrafficLight | ||
trpl | ||
typeof | ||
UFCS | ||
unary | ||
Unary | ||
unoptimized | ||
unsized | ||
USERPROFILE | ||
usize | ||
UsState | ||
Versioning | ||
wasn | ||
whitespace | ||
workspace | ||
workspaces | ||
Workspaces | ||
wouldn | ||
WriteMessage | ||
yyyy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#!/bin/bash | ||
|
||
# Checks project markdown files for spell errors | ||
|
||
# Notes: | ||
|
||
# This script needs dictionary file ($dict_filename) with project-specific | ||
# valid words. If this file is missing, first invocation of a script generates | ||
# a file of words considered typos at the moment. User should remove real typos | ||
# from this file and leave only valid words. When script generates false | ||
# positive after source modification, new valid word should be added | ||
# to dictionary file. | ||
|
||
# Default mode of this script is interactive. Each source file is scanned for | ||
# typos. aspell opens window, suggesting fixes for each found typo. Original | ||
# files with errors will be backed up to files with format "filename.md.bak". | ||
|
||
# When running in CI, this script should be run in "list" mode (pass "list" | ||
# as first argument). In this mode script scans all files and reports found | ||
# errors. Exit code in this case depends on scan result: | ||
# 1 if any errors found, | ||
# 0 if all is clear. | ||
|
||
# Script skips words with length less then or equal to 3. This helps to avoid | ||
# some false positives. | ||
|
||
# We can consider skipping source code in markdown files (```code```) to reduce | ||
# rate of false positives, but then we lose ability to detect typos in code | ||
# comments/strings etc. | ||
|
||
shopt -s nullglob | ||
|
||
dict_filename=dictionary.txt | ||
markdown_sources=(./src/*.md) | ||
mode="check" | ||
|
||
# aspell repeatedly modifies personal dictionary for some purpose, | ||
# so we should use a copy of our dictionary | ||
dict_path="/tmp/$dict_filename" | ||
|
||
if [[ "$1" == "list" ]]; then | ||
mode="list" | ||
fi | ||
|
||
if [[ ! -f "$dict_filename" ]]; then | ||
# Pre-check mode: generates dictionary of words aspell consider typos. | ||
# After user validates that this file contains only valid words, we can | ||
# look for typos using this dictionary and some default aspell dictionary. | ||
echo "Scanning files to generate dictionary file '$dict_filename'." | ||
echo "Please check it doesn't contain any spellings for correct results." | ||
|
||
echo "personal_ws-1.1 en 0 utf-8" > "$dict_filename" | ||
cat "${markdown_sources[@]}" | aspell list | sort -u >> "$dict_filename" | ||
elif [[ "$mode" == "list" ]]; then | ||
# List (default) mode: scan all files, report errors | ||
cp "$dict_filename" "$dict_path" | ||
declare -i retval=0 | ||
|
||
for fname in "${markdown_sources[@]}"; do | ||
command=$(aspell --ignore 3 --personal="$dict_path" "$mode" < "$fname") | ||
if [[ -n "$command" ]]; then | ||
for error in $command; do | ||
# TODO: Find more correct way to get line number | ||
# (ideally from aspell). Now it can make some false positives, | ||
# because it is just a grep | ||
grep --with-filename --line-number --color=always "$error" "$fname" | ||
done | ||
retval=1 | ||
fi | ||
done | ||
exit "$retval" | ||
elif [[ "$mode" == "check" ]]; then | ||
# Interactive mode: fix typos | ||
cp "$dict_filename" "$dict_path" | ||
for fname in "${markdown_sources[@]}"; do | ||
aspell --ignore 3 --personal="$dict_path" "$mode" "$fname" | ||
done | ||
fi |