Skip to content

Commit

Permalink
Version 0.1.4.2. Multiline props are supported. (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
vks4git authored Oct 14, 2021
1 parent c44bde5 commit f7a953d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 16 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.1.4.2] - 2021-10-14
### Changed
- More types of multiline properties are supported.

## [0.1.4.1] - 2021-10-07
### Changed
- CI fix
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.4.1
version: 0.1.4.2
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
46 changes: 32 additions & 14 deletions src/Bio/GB/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@ module Bio.GB.Parser
, rangeP
) where

import Bio.GB.Type (Feature (..), Form (..), GenBankSequence (..), Locus (..),
Meta (..), Parser, Reference (..), Source (..), Version (..))
import Bio.Sequence (Border (..), MarkedSequence, Range (..), RangeBorder (..),
markedSequence, shiftRange)
import Control.Monad.Combinators (many, manyTill, optional, some, (<|>))
import Data.Char (isAlphaNum, isSpace, isUpper)
import Data.Functor (($>))
import Data.Text (Text, intercalate, pack, splitOn, unpack)
import Text.Megaparsec (option, satisfy, sepBy1, takeWhile1P, takeWhileP, try, (<?>))
import Text.Megaparsec.Char (char, digitChar, eol, letterChar, string)
import Text.Megaparsec.Char.Lexer (decimal)
import Bio.GB.Type (Feature (..), Form (..), GenBankSequence (..),
Locus (..), Meta (..), Parser, Reference (..),
Source (..), Version (..))
import Bio.Sequence (Border (..), MarkedSequence, Range (..),
RangeBorder (..), markedSequence, shiftRange)
import Control.Monad.Combinators (many, manyTill, optional, some, (<|>))
import Data.Char (isAlphaNum, isSpace, isUpper)
import Data.Functor (($>))
import Data.Text (Text, intercalate, pack, splitOn, unpack)
import qualified Data.Text as T
import Text.Megaparsec (notFollowedBy, option, satisfy, sepBy1, takeWhile1P,
takeWhileP, try, (<?>))
import Text.Megaparsec.Char (char, digitChar, eol, letterChar, string)
import Text.Megaparsec.Char.Lexer (decimal)

-- | Parser of .gb file.
--
Expand Down Expand Up @@ -164,13 +167,28 @@ propsP = do
propName <- takeWhile1P Nothing (/= '=')
_ <- char '='

propText <- try ((char '\"' *> takeWhile1P Nothing (/= '\"') <* char '\"')
<|> textWithSpacesP)
<* eolSpaceP
propText <- try ((char '\"' *> takeWhile1P Nothing (/= '\"') <* char '\"' <* eolSpaceP)
<|> multiLineProp)

let propTextCorrect = mconcat $ filter (/= featureIndent2) $ splitOn featureIndent2 propText

pure (propName, propTextCorrect)
where
indLine :: Parser Text
indLine = do
_ <- string featureIndent2
notFollowedBy (char '/')
text <- textWithSpacesP
eolSpaceP
pure text

multiLineProp :: Parser Text
multiLineProp = do
fstText <- textWithSpacesP <* eolSpaceP
rest <- many (try indLine)
pure $ T.concat (fstText : rest)



-- | First level of identation in FEATURES table file.
--
Expand Down
1 change: 1 addition & 0 deletions test/GB/fromYanaWithLove.gb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ FEATURES Location/Qualifiers
/mol_type="other DNA"
rep_origin join(1,6551..6951)
/label=pUCorigin
and also a multiline property
/note="/vntifkey=33"
enhancer 449..858
/label=cmv enhanser
Expand Down
2 changes: 1 addition & 1 deletion test/GBParserSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ sophisticatedFeatures =
, preciseSpan (0, 6950))

, (Feature "rep_origin"
[ ("label", "pUCorigin")
[ ("label", "pUCorigin and also a multiline property")
, ("note", "/vntifkey=33")
]
, Join [Point 0, preciseSpan (6550, 6950)])
Expand Down

0 comments on commit f7a953d

Please sign in to comment.