@@ -14,16 +14,18 @@ import Text.ParserCombinators.Parsec((<|>), (<?>),
14
14
unexpected , try , runParser , many , getState , setState , CharParser , ParseError ,
15
15
sepBy1 , option , notFollowedBy , many1 , lookAhead , eof , between ,
16
16
string , noneOf , digit , char , anyChar )
17
+
18
+ import Control.Applicative (asum )
17
19
import Control.Monad (liftM , when , guard )
18
20
import qualified Data.Set as Set (fromList )
19
21
20
22
-- | An element inside @[...]@, denoting a character class.
21
23
data BracketElement
22
- = BEChar Char -- ^ A single character.
23
- | BEChars String -- ^ A sequence of characters expanded from a range (e.g. @a-z@).
24
- | BEColl String -- ^ @foo@ in @[.foo.]@.
25
- | BEEquiv String -- ^ @bar@ in @[=bar=]@.
26
- | BEClass String -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.
24
+ = BEChar Char -- ^ A single character.
25
+ | BERange Char Char -- ^ A character range (e.g. @a-z@).
26
+ | BEColl String -- ^ @foo@ in @[.foo.]@.
27
+ | BEEquiv String -- ^ @bar@ in @[=bar=]@.
28
+ | BEClass String -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.
27
29
28
30
-- | Return either an error message or a tuple of the Pattern and the
29
31
-- largest group index and the largest DoPa index (both have smallest
@@ -121,9 +123,10 @@ p_set invert = do initial <- (option "" ((char ']' >> return "]") <|> (char '-'
121
123
values <- if null initial then many1 p_set_elem else many p_set_elem
122
124
_ <- char ' ]'
123
125
ci <- char_index
124
- let chars = maybe'set $ initial
125
- ++ [c | BEChar c <- values ]
126
- ++ concat [s | BEChars s <- values ]
126
+ let chars = maybe'set $ concat $
127
+ initial :
128
+ [ c | BEChar c <- values ] :
129
+ [ [start.. end] | BERange start end <- values ]
127
130
colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
128
131
equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
129
132
class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
@@ -134,8 +137,14 @@ p_set invert = do initial <- (option "" ((char ']' >> return "]") <|> (char '-'
134
137
-- From here down the code is the parser and functions for pattern [ ] set things
135
138
136
139
p_set_elem :: P BracketElement
137
- p_set_elem = p_set_elem_class <|> p_set_elem_equiv <|> p_set_elem_coll
138
- <|> p_set_elem_range <|> p_set_elem_char <?> " Failed to parse bracketed string"
140
+ p_set_elem = checkBracketElement =<< asum
141
+ [ p_set_elem_class
142
+ , p_set_elem_equiv
143
+ , p_set_elem_coll
144
+ , p_set_elem_range
145
+ , p_set_elem_char
146
+ , fail " Failed to parse bracketed string"
147
+ ]
139
148
140
149
p_set_elem_class :: P BracketElement
141
150
p_set_elem_class = liftM BEClass $
@@ -154,10 +163,7 @@ p_set_elem_range = try $ do
154
163
start <- noneOf " ]-"
155
164
_ <- char ' -'
156
165
end <- noneOf " ]"
157
- -- bug fix: check start <= end before "return (BEChars [start..end])"
158
- if start <= end
159
- then return (BEChars [start.. end])
160
- else unexpected " End point of dashed character range is less than starting point"
166
+ return $ BERange start end
161
167
162
168
p_set_elem_char :: P BracketElement
163
169
p_set_elem_char = do
@@ -167,3 +173,23 @@ p_set_elem_char = do
167
173
when (not atEnd) (unexpected " A dash is in the wrong place in a bracket" )
168
174
return (BEChar c)
169
175
176
+ -- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
177
+ -- This failure should not be caught.
178
+ --
179
+ checkBracketElement :: BracketElement -> P BracketElement
180
+ checkBracketElement e =
181
+ case e of
182
+ BERange start end
183
+ | start > end -> fail $ unwords
184
+ [ " End point"
185
+ , show end
186
+ , " of dashed character range is less than starting point"
187
+ , show start
188
+ ]
189
+ | otherwise -> ok
190
+ BEChar _ -> ok
191
+ BEClass _ -> ok
192
+ BEColl _ -> ok
193
+ BEEquiv _ -> ok
194
+ where
195
+ ok = return e
0 commit comments