-
Notifications
You must be signed in to change notification settings - Fork 1
/
Titlecase.regex
76 lines (68 loc) · 2 KB
/
Titlecase.regex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
smallWordExpression = {
(?<!q&)a|an|and|as|at(?!&t)|but|by|en|for|if|in|of|on|or|the|to|v[.]?|via|vs[.]?
}
apostropheExpression = {
(?: ['’] [[:lower:]]* )?
}
titlecaseExpression = {
\b (_*) (?:
(
# File path
(?<=[\ ][/\\]) [[:alpha:]]+ [-_[:alpha:]/\\]+
# or URL, domain, or email
| [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ $apostropheExpression
)
# or small word (case-insensitive)
| ( (?i: $smallWordExpression ) $apostropheExpression )
# or word w/o internal caps
| ( [[:alpha:]] [[:lower:]'’()\[\]{}]* $apostropheExpression )
# or some other word
| ( [[:alpha:]] [[:alpha:]'’()\[\]{}]* $apostropheExpression )
) (_*) \b
}
# Exceptions for small words
## Capitalize at start of title
startExceptionExpression = {
(?i)
# Start of title
( \A [[:punct:]]*
# or of subsentence
| [:.;?!][\ ]+
# or of inserted subphrase
| [\ ]['"“‘(\[][\ ]* )
# followed by small word
( $smallWordExpression ) \b
}
## Capitalize at end of title
endExceptionExpression = {
(?i)
# Small word
\b ( $smallWordExpression )
# at the end of the title
(?= [[:punct:]]* \Z
# or of an inserted subphrase?
| ['"’”)\]] [\ ] )
}
# Exceptions for small words in hyphenated compound words
## "in-flight" -> In-Flight
startHyphenatedCompoundExpression = {
(?i)
\b
# Negative lookbehind for a hyphen; we don't want to match man-in-the-middle but do want (in-flight)
(?<! -)
( $smallWordExpression )
# Lookahead for "-someword"
(?= -[[:alpha:]]+)
}
## "Stand-in" -> "Stand-In" (Stand is already capped at this point)
endHyphenatedCompoundExpression = {
(?i)
\b
# Negative lookbehind for a hyphen; we don't want to match man-in-the-middle but do want (stand-in)
(?<!…)
# $1 = first word and hyphen, should already be properly capped
( [[:alpha:]]+- )
( $smallWordExpression )
# Negative lookahead for another '-'
(?! - )
}