Skip to content

Commit eb81202

Browse files
committed
New module Text.Parsing.Parser.String.Basic
New functions - `Parser.String.Basic.number` - `Parser.String.Basic.intDecimal` Moved the `Parser.Token` parsers `digit`, `hexDigit`, `octDigit`, `upper`, `space`, `letter`, `alphaNum` into the new module `Parser.String.Basic`.
1 parent 23710ea commit eb81202

File tree

9 files changed

+188
-81
lines changed

9 files changed

+188
-81
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ New features:
1515
- `Parser.Combinators.many1Till_` (#143 by @jamesbrock)
1616
- `Parser.Combinators.manyTillRec_` (#143 by @jamesbrock)
1717
- `Parser.Combinators.many1TillRec_` (#143 by @jamesbrock)
18+
- `Parser.String.Basic.number` (#142 by @jamesbrock)
19+
- `Parser.String.Basic.intDecimal` (#142 by @jamesbrock)
1820

1921
Bugfixes:
2022

@@ -23,8 +25,11 @@ Bugfixes:
2325

2426
Other improvements:
2527

28+
- Moved the `Parser.Token` parsers `digit`, `hexDigit`, `octDigit`, `upper`,
29+
`space`, `letter`, `alphaNum` into the new module `Parser.String.Basic`. (#142 by @jamesdbrock)
2630
- Documentation. (#140 by @jamesdbrock)
2731
- Documentation. (#143 by @jamesdbrock)
32+
- Documentation. (#142 by @jamesdbrock)
2833

2934
## [v8.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.1.0) - 2022-01-10
3035

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ and then the parser will succeed and return `Right true`.
7979

8080
### More parsers
8181

82-
There are other `String` parsers in the module `Text.Parsing.Parser.Token`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter.
82+
There are other `String` parsers in the module `Text.Parsing.Parser.String.Basic`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter.
8383

8484
### Parser combinators
8585

bench/Main.purs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ import Effect.Unsafe (unsafePerformEffect)
5555
import Performance.Minibench (benchWith)
5656
import Text.Parsing.Parser (Parser, runParser)
5757
import Text.Parsing.Parser.String (string)
58-
import Text.Parsing.Parser.Token (digit)
58+
import Text.Parsing.Parser.String.Basic (digit)
5959
import Text.Parsing.StringParser as StringParser
6060
import Text.Parsing.StringParser.CodePoints as StringParser.CodePoints
6161
import Text.Parsing.StringParser.CodeUnits as StringParser.CodeUnits

src/Text/Parsing/Parser/Expr.purs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Expr__](https://hackage.haskell.org/package/docs/Text-Parsec-Expr.html)
3+
-- | module.
4+
15
module Text.Parsing.Parser.Expr
26
( Assoc(..)
37
, Operator(..)

src/Text/Parsing/Parser/Indent.purs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
-- | This is purescript-port of Text.Parsing.Indent
2-
-- | <https://hackage.haskell.org/package/indents-0.3.3/docs/Text-Parsec-Indent.html>, 05.07.2016.
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Indent__](https://hackage.haskell.org/package/indents-0.3.3/docs/Text-Parsec-Indent.html)
3+
-- | module from 2016-05-07.
4+
-- |
35
-- | A module to construct indentation aware parsers. Many programming
46
-- | language have indentation based syntax rules e.g. python and Haskell.
57
-- | This module exports combinators to create such parsers.

src/Text/Parsing/Parser/Language.purs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Language__](https://hackage.haskell.org/package/parsec/docs/Text-Parsec-Language.html)
3+
-- | module.
14
module Text.Parsing.Parser.Language
25
( haskellDef
36
, haskell
@@ -11,7 +14,8 @@ import Prelude
1114
import Control.Alt ((<|>))
1215
import Text.Parsing.Parser (ParserT)
1316
import Text.Parsing.Parser.String (char, oneOf)
14-
import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, alphaNum, letter, makeTokenParser, unGenLanguageDef)
17+
import Text.Parsing.Parser.String.Basic (alphaNum, letter)
18+
import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, makeTokenParser, unGenLanguageDef)
1519

1620
-----------------------------------------------------------
1721
-- Styles: haskellStyle, javaStyle
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
-- | Basic `String` parsers derived from primitive `String` parsers.
2+
-- |
3+
-- | Note: In the future, the
4+
-- | __noneOf__, __noneOfCodePoints__, __oneOf__, __oneOfCodePoints__, __skipSpaces__, __whiteSpace__
5+
-- | should be moved into this module and removed from the
6+
-- | __Text.Parsing.Parser.String__ module, because they are not primitive parsers.
7+
module Text.Parsing.Parser.String.Basic
8+
( digit
9+
, hexDigit
10+
, octDigit
11+
, letter
12+
, space
13+
, upper
14+
, alphaNum
15+
, intDecimal
16+
, number
17+
, module Text.Parsing.Parser.String
18+
) where
19+
20+
import Prelude
21+
22+
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper)
23+
import Data.Int as Data.Int
24+
import Data.Maybe (Maybe(..))
25+
import Data.Number (infinity, nan)
26+
import Data.Number as Data.Number
27+
import Data.String (CodePoint)
28+
import Data.String.CodePoints (codePointFromChar)
29+
import Data.Tuple (Tuple(..))
30+
import Text.Parsing.Parser (ParserT, fail)
31+
import Text.Parsing.Parser.Combinators (choice, skipMany, (<?>))
32+
import Text.Parsing.Parser.String (noneOf, noneOfCodePoints, oneOf, oneOfCodePoints, skipSpaces, whiteSpace)
33+
import Text.Parsing.Parser.String as Parser.String
34+
35+
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
36+
digit :: forall m. Monad m => ParserT String m Char
37+
digit = satisfyCP isDecDigit <?> "digit"
38+
39+
-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`.
40+
hexDigit :: forall m. Monad m => ParserT String m Char
41+
hexDigit = satisfyCP isHexDigit <?> "hex digit"
42+
43+
-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`.
44+
octDigit :: forall m. Monad m => ParserT String m Char
45+
octDigit = satisfyCP isOctDigit <?> "oct digit"
46+
47+
-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`.
48+
upper :: forall m. Monad m => ParserT String m Char
49+
upper = satisfyCP isUpper <?> "uppercase letter"
50+
51+
-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`.
52+
space :: forall m. Monad m => ParserT String m Char
53+
space = satisfyCP isSpace <?> "space"
54+
55+
-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`.
56+
letter :: forall m. Monad m => ParserT String m Char
57+
letter = satisfyCP isAlpha <?> "letter"
58+
59+
-- | Parse an alphabetical or numerical character.
60+
-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`.
61+
alphaNum :: forall m. Monad m => ParserT String m Char
62+
alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
63+
64+
-- | Parser based on the __Data.Number.fromString__ function.
65+
-- |
66+
-- | This should be the inverse of `show :: String -> Number`.
67+
-- |
68+
-- | Examples of strings which can be parsed by this parser:
69+
-- | * `"3"`
70+
-- | * `"3.0"`
71+
-- | * `"0.3"`
72+
-- | * `"-0.3"`
73+
-- | * `"+0.3"`
74+
-- | * `"-3e-1"`
75+
-- | * `"-3.0E-1.0"`
76+
-- | * `"NaN"`
77+
-- | * `"-Infinity"`
78+
number :: forall m. Monad m => ParserT String m Number
79+
-- TODO because the JavaScript parseFloat function will successfully parse
80+
-- a Number up until it doesn't understand something and then return
81+
-- the partially parsed Number, this parser will sometimes consume more
82+
-- String that it actually parses. Example "1..3" will parse as 1.0.
83+
-- So this needs improvement.
84+
number =
85+
choice
86+
[ Parser.String.string "Infinity" *> pure infinity
87+
, Parser.String.string "+Infinity" *> pure infinity
88+
, Parser.String.string "-Infinity" *> pure (negate infinity)
89+
, Parser.String.string "NaN" *> pure nan
90+
, do
91+
Tuple section _ <- Parser.String.match do
92+
_ <- Parser.String.oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
93+
skipMany $ Parser.String.oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
94+
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
95+
case Data.Number.fromString section of
96+
Nothing -> fail $ "Could not parse Number " <> section
97+
Just x -> pure x
98+
]
99+
100+
-- | Parser based on the __Data.Int.fromString__ function.
101+
-- |
102+
-- | This should be the inverse of `show :: String -> Int`.
103+
-- |
104+
-- | Examples of strings which can be parsed by this parser:
105+
-- | * `"3"`
106+
-- | * `"-3"`
107+
-- | * `"+300"`
108+
intDecimal :: forall m. Monad m => ParserT String m Int
109+
intDecimal = do
110+
Tuple section _ <- Parser.String.match do
111+
_ <- Parser.String.oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
112+
skipMany $ Parser.String.oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
113+
case Data.Int.fromString section of
114+
Nothing -> fail $ "Could not parse Int " <> section
115+
Just x -> pure x
116+
117+
-- | Helper function
118+
satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char
119+
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)

src/Text/Parsing/Parser/Token.purs

Lines changed: 18 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
-- | Functions for working with streams of tokens.
2+
-- |
3+
-- | This module is a port of the Haskell
4+
-- | [__Text.Parsec.Token__](https://hackage.haskell.org/package/docs/Text-Parsec-Token.html)
5+
-- | module.
26

37
module Text.Parsing.Parser.Token
48
( token
@@ -11,14 +15,7 @@ module Text.Parsing.Parser.Token
1115
, TokenParser
1216
, GenTokenParser
1317
, makeTokenParser
14-
-- should these be exported? Maybe they should go in a different module?
15-
, digit
16-
, hexDigit
17-
, octDigit
18-
, upper
19-
, space
20-
, letter
21-
, alphaNum
18+
, module Text.Parsing.Parser.String.Basic
2219
) where
2320

2421
import Prelude hiding (between, when)
@@ -28,7 +25,7 @@ import Control.Monad.State (get, gets, modify_)
2825
import Control.MonadPlus (guard, (<|>))
2926
import Data.Array as Array
3027
import Data.Char (fromCharCode, toCharCode)
31-
import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper)
28+
import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isSpace)
3229
import Data.Either (Either(..))
3330
import Data.Foldable (foldl, foldr)
3431
import Data.Identity (Identity)
@@ -37,8 +34,7 @@ import Data.List (List(..))
3734
import Data.List as List
3835
import Data.List.NonEmpty (NonEmptyList)
3936
import Data.Maybe (Maybe(..), maybe)
40-
import Data.Number (fromString)
41-
import Data.String (CodePoint, null, toLower)
37+
import Data.String (null, toLower)
4238
import Data.String.CodePoints (codePointFromChar)
4339
import Data.String.CodeUnits (singleton, toChar) as CodeUnits
4440
import Data.String.CodeUnits as SCU
@@ -49,7 +45,8 @@ import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail)
4945
import Text.Parsing.Parser.Combinators (between, choice, notFollowedBy, option, sepBy, sepBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>))
5046
import Text.Parsing.Parser.Pos (Position)
5147
import Text.Parsing.Parser.String (char, noneOf, oneOf, satisfy, satisfyCodePoint, string)
52-
import Text.Parsing.Parser.String as Parser.String
48+
import Text.Parsing.Parser.String.Basic as Basic
49+
import Text.Parsing.Parser.String.Basic (digit, hexDigit, octDigit, upper, space, letter, alphaNum)
5350

5451
-- | A parser which returns the first token in the stream.
5552
token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a
@@ -477,7 +474,7 @@ makeTokenParser (LanguageDef languageDef) =
477474
escapeEmpty = char '&'
478475

479476
escapeGap :: ParserT String m Char
480-
escapeGap = Array.some space *> char '\\' <?> "end of string gap"
477+
escapeGap = Array.some Basic.space *> char '\\' <?> "end of string gap"
481478

482479
-- -- escape codes
483480
escapeCode :: ParserT String m Char
@@ -487,16 +484,16 @@ makeTokenParser (LanguageDef languageDef) =
487484
charControl :: ParserT String m Char
488485
charControl = do
489486
_ <- char '^'
490-
code <- upper
487+
code <- Basic.upper
491488
case fromCharCode (toCharCode code - toCharCode 'A' + 1) of
492489
Just c -> pure c
493490
Nothing -> fail "invalid character code (should not happen)"
494491

495492
charNum :: ParserT String m Char
496493
charNum = do
497494
code <- decimal
498-
<|> (char 'o' *> number 8 octDigit)
499-
<|> (char 'x' *> number 16 hexDigit)
495+
<|> (char 'o' *> number 8 Basic.octDigit)
496+
<|> (char 'x' *> number 16 Basic.hexDigit)
500497
if code > 0x10FFFF then fail "invalid escape sequence"
501498
else case fromCharCode code of
502499
Just c -> pure c
@@ -610,14 +607,8 @@ makeTokenParser (LanguageDef languageDef) =
610607
natural = lexeme nat <?> "natural"
611608

612609
-- floats
613-
614610
floating :: ParserT String m Number
615-
-- floating = decimal >>= fractExponent
616-
floating = do
617-
Tuple section _ <- Parser.String.match $ skipMany1 $ oneOf [ '-', '.', 'e', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
618-
case fromString section of
619-
Nothing -> fail $ "Could not parse Number " <> section
620-
Just x -> pure x
611+
floating = decimal >>= fractExponent
621612

622613
natFloat :: ParserT String m (Either Int Number)
623614
natFloat = char '0' *> zeroNumFloat
@@ -654,7 +645,7 @@ makeTokenParser (LanguageDef languageDef) =
654645
fraction :: ParserT String m Number
655646
fraction = "fraction" <??> do
656647
_ <- char '.'
657-
digits <- Array.some digit <?> "fraction"
648+
digits <- Array.some Basic.digit <?> "fraction"
658649
maybe (fail "not digit") pure $ foldr op (Just 0.0) digits
659650
where
660651
op :: Char -> Maybe Number -> Maybe Number
@@ -696,13 +687,13 @@ makeTokenParser (LanguageDef languageDef) =
696687
(hexadecimal <|> octal <|> decimal <|> pure 0) <?> ""
697688

698689
decimal :: ParserT String m Int
699-
decimal = number 10 digit
690+
decimal = number 10 Basic.digit
700691

701692
hexadecimal :: ParserT String m Int
702-
hexadecimal = oneOf [ 'x', 'X' ] *> number 16 hexDigit
693+
hexadecimal = oneOf [ 'x', 'X' ] *> number 16 Basic.hexDigit
703694

704695
octal :: ParserT String m Int
705-
octal = oneOf [ 'o', 'O' ] *> number 8 octDigit
696+
octal = oneOf [ 'o', 'O' ] *> number 8 Basic.octDigit
706697

707698
number :: Int -> ParserT String m Char -> ParserT String m Int
708699
number base baseDigit = do
@@ -886,38 +877,3 @@ inCommentSingle (LanguageDef languageDef) =
886877
startEnd :: Array Char
887878
startEnd = SCU.toCharArray languageDef.commentEnd <> SCU.toCharArray languageDef.commentStart
888879

889-
-------------------------------------------------------------------------
890-
-- Helper functions that should maybe go in Text.Parsing.Parser.String --
891-
-------------------------------------------------------------------------
892-
893-
satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char
894-
satisfyCP p = satisfy (p <<< codePointFromChar)
895-
896-
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
897-
digit :: forall m. Monad m => ParserT String m Char
898-
digit = satisfyCP isDecDigit <?> "digit"
899-
900-
-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`.
901-
hexDigit :: forall m. Monad m => ParserT String m Char
902-
hexDigit = satisfyCP isHexDigit <?> "hex digit"
903-
904-
-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`.
905-
octDigit :: forall m. Monad m => ParserT String m Char
906-
octDigit = satisfyCP isOctDigit <?> "oct digit"
907-
908-
-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`.
909-
upper :: forall m. Monad m => ParserT String m Char
910-
upper = satisfyCP isUpper <?> "uppercase letter"
911-
912-
-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`.
913-
space :: forall m. Monad m => ParserT String m Char
914-
space = satisfyCP isSpace <?> "space"
915-
916-
-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`.
917-
letter :: forall m. Monad m => ParserT String m Char
918-
letter = satisfyCP isAlpha <?> "letter"
919-
920-
-- | Parse an alphabetical or numerical character.
921-
-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`.
922-
alphaNum :: forall m. Monad m => ParserT String m Char
923-
alphaNum = satisfyCP isAlphaNum <?> "letter or digit"

0 commit comments

Comments
 (0)