Skip to content

Commit 6578f80

Browse files
committed
ReadRegex: Raise correct error for empty char range (see #1)
1 parent e5db8e2 commit 6578f80

File tree

1 file changed

+40
-14
lines changed

1 file changed

+40
-14
lines changed

lib/Text/Regex/TDFA/ReadRegex.hs

+40-14
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,18 @@ import Text.ParserCombinators.Parsec((<|>), (<?>),
1414
unexpected, try, runParser, many, getState, setState, CharParser, ParseError,
1515
sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
1616
string, noneOf, digit, char, anyChar)
17+
18+
import Control.Applicative (asum)
1719
import Control.Monad(liftM, when, guard)
1820
import qualified Data.Set as Set(fromList)
1921

2022
-- | An element inside @[...]@, denoting a character class.
2123
data BracketElement
22-
= BEChar Char -- ^ A single character.
23-
| BEChars String -- ^ A sequence of characters expanded from a range (e.g. @a-z@).
24-
| BEColl String -- ^ @foo@ in @[.foo.]@.
25-
| BEEquiv String -- ^ @bar@ in @[=bar=]@.
26-
| BEClass String -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.
24+
= BEChar Char -- ^ A single character.
25+
| BERange Char Char -- ^ A character range (e.g. @a-z@).
26+
| BEColl String -- ^ @foo@ in @[.foo.]@.
27+
| BEEquiv String -- ^ @bar@ in @[=bar=]@.
28+
| BEClass String -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.
2729

2830
-- | Return either an error message or a tuple of the Pattern and the
2931
-- largest group index and the largest DoPa index (both have smallest
@@ -121,9 +123,10 @@ p_set invert = do initial <- (option "" ((char ']' >> return "]") <|> (char '-'
121123
values <- if null initial then many1 p_set_elem else many p_set_elem
122124
_ <- char ']'
123125
ci <- char_index
124-
let chars = maybe'set $ initial
125-
++ [c | BEChar c <- values ]
126-
++ concat [s | BEChars s <- values ]
126+
let chars = maybe'set $ concat $
127+
initial :
128+
[ c | BEChar c <- values ] :
129+
[ [start..end] | BERange start end <- values ]
127130
colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
128131
equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
129132
class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
@@ -134,8 +137,14 @@ p_set invert = do initial <- (option "" ((char ']' >> return "]") <|> (char '-'
134137
-- From here down the code is the parser and functions for pattern [ ] set things
135138

136139
p_set_elem :: P BracketElement
137-
p_set_elem = p_set_elem_class <|> p_set_elem_equiv <|> p_set_elem_coll
138-
<|> p_set_elem_range <|> p_set_elem_char <?> "Failed to parse bracketed string"
140+
p_set_elem = checkBracketElement =<< asum
141+
[ p_set_elem_class
142+
, p_set_elem_equiv
143+
, p_set_elem_coll
144+
, p_set_elem_range
145+
, p_set_elem_char
146+
, fail "Failed to parse bracketed string"
147+
]
139148

140149
p_set_elem_class :: P BracketElement
141150
p_set_elem_class = liftM BEClass $
@@ -154,10 +163,7 @@ p_set_elem_range = try $ do
154163
start <- noneOf "]-"
155164
_ <- char '-'
156165
end <- noneOf "]"
157-
-- bug fix: check start <= end before "return (BEChars [start..end])"
158-
if start <= end
159-
then return (BEChars [start..end])
160-
else unexpected "End point of dashed character range is less than starting point"
166+
return $ BERange start end
161167

162168
p_set_elem_char :: P BracketElement
163169
p_set_elem_char = do
@@ -167,3 +173,23 @@ p_set_elem_char = do
167173
when (not atEnd) (unexpected "A dash is in the wrong place in a bracket")
168174
return (BEChar c)
169175

176+
-- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
177+
-- This failure should not be caught.
178+
--
179+
checkBracketElement :: BracketElement -> P BracketElement
180+
checkBracketElement e =
181+
case e of
182+
BERange start end
183+
| start > end -> fail $ unwords
184+
[ "End point"
185+
, show end
186+
, "of dashed character range is less than starting point"
187+
, show start
188+
]
189+
| otherwise -> ok
190+
BEChar _ -> ok
191+
BEClass _ -> ok
192+
BEColl _ -> ok
193+
BEEquiv _ -> ok
194+
where
195+
ok = return e

0 commit comments

Comments
 (0)