From 2c7deb6720353ee1b17404ec69d826491ed5cfb1 Mon Sep 17 00:00:00 2001 From: John Watson Date: Fri, 24 Feb 2017 10:46:39 +0000 Subject: [PATCH 1/4] Add a regex combinator --- bower.json | 3 +- src/Text/Parsing/StringParser/String.purs | 46 +++++++++++++++++++++-- test/Main.purs | 3 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/bower.json b/bower.json index 1bde0f7..36c1924 100644 --- a/bower.json +++ b/bower.json @@ -26,7 +26,8 @@ "purescript-foldable-traversable": "^2.0.0", "purescript-either": "^2.0.0", "purescript-lists": "^3.1.0", - "purescript-tailrec": "^2.0.0" + "purescript-tailrec": "^2.0.0", + "purescript-stringutils": "^0.0.4" }, "devDependencies": { "purescript-math": "^2.0.0", diff --git a/src/Text/Parsing/StringParser/String.purs b/src/Text/Parsing/StringParser/String.purs index f45f1ca..d21a3b3 100644 --- a/src/Text/Parsing/StringParser/String.purs +++ b/src/Text/Parsing/StringParser/String.purs @@ -15,17 +15,22 @@ module Text.Parsing.StringParser.String , upperCaseChar , anyLetter , alphaNum + , regex' + , regex ) where import Prelude import Control.Alt ((<|>)) -import Data.Array ((..)) +import Data.Array ((..), uncons) import Data.Char (toCharCode) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldMap, elem, notElem) -import Data.Maybe (Maybe(..)) -import Data.String (Pattern(..), charAt, length, indexOf', singleton) +import Data.Maybe (Maybe(..), fromMaybe) +import Data.String (Pattern(..), charAt, drop, length, indexOf', singleton) +import Data.String.Utils (startsWith) +import Data.String.Regex as Regex +import Data.String.Regex.Flags (noFlags) import Text.Parsing.StringParser (Parser(..), ParseError(..), try, fail) import Text.Parsing.StringParser.Combinators (many, ()) @@ -111,3 +116,38 @@ anyLetter = lowerCaseChar <|> upperCaseChar "Expected a letter" -- | Match a letter or a number. alphaNum :: Parser Char alphaNum = anyLetter <|> anyDigit "Expected a letter or a number" + +-- | Build the regular expression from the pattern and match it, ensuring +-- | that the pattern only attempts to match from the start of the target. +regex' :: String -> Parser String +regex' pat = + case er of + Left _ -> + fail $ "Illegal regex " <> pat + Right r -> + regex r + where + pattern = + if startsWith "^" pat then + pat + else + "^" <> pat + er = Regex.regex pattern noFlags + +-- | Match the regular expression. +regex :: Regex.Regex -> Parser String +regex r = + Parser \{ str, pos } -> + let + remainder = drop pos str + in + -- reduce the possible array of matches to 0 or 1 elements to aid Array pattern matching + case uncons $ fromMaybe [] $ Regex.match r remainder of + Just { head: Just matched, tail: _ } -> + -- only accept matches at position 0 + if startsWith matched remainder then + Right { result: matched, suffix: { str, pos: pos + length matched } } + else + Left { pos, error: ParseError $ "no match - consider prefacing the pattern with '^'" } + _ -> + Left { pos, error: ParseError $ "no match" } diff --git a/test/Main.purs b/test/Main.purs index 0ebac2c..2a86173 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -15,7 +15,7 @@ import Test.Assert (assert', ASSERT, assert) import Text.Parsing.StringParser (Parser, runParser, try) import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, chainl, fix, between) import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser) -import Text.Parsing.StringParser.String (anyDigit, eof, string, anyChar) +import Text.Parsing.StringParser.String (anyDigit, eof, string, anyChar, regex') parens :: forall a. Parser a -> Parser a parens = between (string "(") (string ")") @@ -84,3 +84,4 @@ main = do assert' "tryTest "$ canParse tryTest "aacc" assert $ expectResult ('0':'1':'2':'3':'4':Nil) (many1 anyDigit) "01234/" assert $ expectResult ('5':'6':'7':'8':'9':Nil) (many1 anyDigit) "56789:" + assert $ expectResult "aaaa" (regex' "a+") "aaaab" From 656d96027e21891d940315dfeaa391ad02b4f22e Mon Sep 17 00:00:00 2001 From: John Watson Date: Sun, 26 Feb 2017 10:06:59 +0000 Subject: [PATCH 2/4] Fix after review comments Replace startsWith with stripPrefix. Remove Bower dependency. Inline the regex into the case expression. Prefix error messages with the function name for errors which indicate pattern problems All very good points. Many thanks for the review. --- bower.json | 3 +-- src/Text/Parsing/StringParser/String.purs | 26 +++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/bower.json b/bower.json index 36c1924..1bde0f7 100644 --- a/bower.json +++ b/bower.json @@ -26,8 +26,7 @@ "purescript-foldable-traversable": "^2.0.0", "purescript-either": "^2.0.0", "purescript-lists": "^3.1.0", - "purescript-tailrec": "^2.0.0", - "purescript-stringutils": "^0.0.4" + "purescript-tailrec": "^2.0.0" }, "devDependencies": { "purescript-math": "^2.0.0", diff --git a/src/Text/Parsing/StringParser/String.purs b/src/Text/Parsing/StringParser/String.purs index d21a3b3..a7c2f88 100644 --- a/src/Text/Parsing/StringParser/String.purs +++ b/src/Text/Parsing/StringParser/String.purs @@ -27,8 +27,7 @@ import Data.Char (toCharCode) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldMap, elem, notElem) import Data.Maybe (Maybe(..), fromMaybe) -import Data.String (Pattern(..), charAt, drop, length, indexOf', singleton) -import Data.String.Utils (startsWith) +import Data.String (Pattern(..), charAt, drop, length, indexOf', singleton, stripPrefix) import Data.String.Regex as Regex import Data.String.Regex.Flags (noFlags) import Text.Parsing.StringParser (Parser(..), ParseError(..), try, fail) @@ -121,18 +120,18 @@ alphaNum = anyLetter <|> anyDigit "Expected a letter or a number" -- | that the pattern only attempts to match from the start of the target. regex' :: String -> Parser String regex' pat = - case er of + case Regex.regex pattern noFlags of Left _ -> - fail $ "Illegal regex " <> pat + fail $ "Text.Parsing.StringParser.String.regex': illegal regex " <> pat Right r -> regex r where pattern = - if startsWith "^" pat then - pat - else - "^" <> pat - er = Regex.regex pattern noFlags + case stripPrefix (Pattern "^") pat of + Nothing -> + "^" <> pat + _ -> + pat -- | Match the regular expression. regex :: Regex.Regex -> Parser String @@ -145,9 +144,10 @@ regex r = case uncons $ fromMaybe [] $ Regex.match r remainder of Just { head: Just matched, tail: _ } -> -- only accept matches at position 0 - if startsWith matched remainder then - Right { result: matched, suffix: { str, pos: pos + length matched } } - else - Left { pos, error: ParseError $ "no match - consider prefacing the pattern with '^'" } + case stripPrefix (Pattern matched) remainder of + Nothing -> + Left { pos, error: ParseError $ "Text.Parsing.StringParser.String.regex: no match - consider prefacing the pattern with '^'" } + _ -> + Right { result: matched, suffix: { str, pos: pos + length matched } } _ -> Left { pos, error: ParseError $ "no match" } From d75af2dd41bcea8cc6f1658985ee27440d8467d3 Mon Sep 17 00:00:00 2001 From: John Watson Date: Tue, 28 Feb 2017 18:31:27 +0000 Subject: [PATCH 3/4] Revert to a single function named regex --- src/Text/Parsing/StringParser/String.purs | 61 ++++++++++------------- test/Main.purs | 5 +- 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/Text/Parsing/StringParser/String.purs b/src/Text/Parsing/StringParser/String.purs index a7c2f88..250a980 100644 --- a/src/Text/Parsing/StringParser/String.purs +++ b/src/Text/Parsing/StringParser/String.purs @@ -15,7 +15,6 @@ module Text.Parsing.StringParser.String , upperCaseChar , anyLetter , alphaNum - , regex' , regex ) where @@ -116,38 +115,30 @@ anyLetter = lowerCaseChar <|> upperCaseChar "Expected a letter" alphaNum :: Parser Char alphaNum = anyLetter <|> anyDigit "Expected a letter or a number" --- | Build the regular expression from the pattern and match it, ensuring --- | that the pattern only attempts to match from the start of the target. -regex' :: String -> Parser String -regex' pat = - case Regex.regex pattern noFlags of - Left _ -> - fail $ "Text.Parsing.StringParser.String.regex': illegal regex " <> pat - Right r -> - regex r - where - pattern = - case stripPrefix (Pattern "^") pat of - Nothing -> - "^" <> pat - _ -> - pat - --- | Match the regular expression. -regex :: Regex.Regex -> Parser String -regex r = - Parser \{ str, pos } -> - let - remainder = drop pos str - in - -- reduce the possible array of matches to 0 or 1 elements to aid Array pattern matching - case uncons $ fromMaybe [] $ Regex.match r remainder of - Just { head: Just matched, tail: _ } -> - -- only accept matches at position 0 - case stripPrefix (Pattern matched) remainder of - Nothing -> - Left { pos, error: ParseError $ "Text.Parsing.StringParser.String.regex: no match - consider prefacing the pattern with '^'" } - _ -> - Right { result: matched, suffix: { str, pos: pos + length matched } } +-- | match the regular expression +regex :: String -> Parser String +regex pat = + case Regex.regex pattern noFlags of + Left _ -> + fail $ "Text.Parsing.StringParser.String.regex': illegal regex " <> pat + Right r -> + matchRegex r + where + -- ensure the pattern only matches the current position in the parse + pattern = + case stripPrefix (Pattern "^") pat of + Nothing -> + "^" <> pat _ -> - Left { pos, error: ParseError $ "no match" } + pat + matchRegex :: Regex.Regex -> Parser String + matchRegex r = + Parser \{ str, pos } -> + let + remainder = drop pos str + in + case uncons $ fromMaybe [] $ Regex.match r remainder of + Just { head: Just matched, tail: _ } -> + Right { result: matched, suffix: { str, pos: pos + length matched } } + _ -> + Left { pos, error: ParseError $ "no match" } diff --git a/test/Main.purs b/test/Main.purs index 2a86173..e68ef2f 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -15,7 +15,7 @@ import Test.Assert (assert', ASSERT, assert) import Text.Parsing.StringParser (Parser, runParser, try) import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, chainl, fix, between) import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser) -import Text.Parsing.StringParser.String (anyDigit, eof, string, anyChar, regex') +import Text.Parsing.StringParser.String (anyDigit, eof, string, anyChar, regex) parens :: forall a. Parser a -> Parser a parens = between (string "(") (string ")") @@ -48,6 +48,7 @@ exprTest = buildExprParser [ [Infix (string "/" >>= \_ -> pure div) AssocRight] ] digit tryTest :: Parser String + -- reduce the possible array of matches to 0 or 1 elements to aid Array pattern matching tryTest = try ((<>) <$> string "aa" <*> string "bb") <|> (<>) <$> string "aa" <*> string "cc" @@ -84,4 +85,4 @@ main = do assert' "tryTest "$ canParse tryTest "aacc" assert $ expectResult ('0':'1':'2':'3':'4':Nil) (many1 anyDigit) "01234/" assert $ expectResult ('5':'6':'7':'8':'9':Nil) (many1 anyDigit) "56789:" - assert $ expectResult "aaaa" (regex' "a+") "aaaab" + assert $ expectResult "aaaa" (regex "a+") "aaaab" From cf2e8974a37f5050a5814e15a20c48ecf4a33cb2 Mon Sep 17 00:00:00 2001 From: John Watson Date: Wed, 1 Mar 2017 08:40:49 +0000 Subject: [PATCH 4/4] Remove superfluous $ --- src/Text/Parsing/StringParser/String.purs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Parsing/StringParser/String.purs b/src/Text/Parsing/StringParser/String.purs index 250a980..83ab9b7 100644 --- a/src/Text/Parsing/StringParser/String.purs +++ b/src/Text/Parsing/StringParser/String.purs @@ -141,4 +141,4 @@ regex pat = Just { head: Just matched, tail: _ } -> Right { result: matched, suffix: { str, pos: pos + length matched } } _ -> - Left { pos, error: ParseError $ "no match" } + Left { pos, error: ParseError "no match" }