Skip to content

Commit

Permalink
Version 0.1.3.7. Generic fasta parser (#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
vks4git authored Oct 15, 2020
1 parent e53b634 commit b586829
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 17 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.1.3.7] - 2020-10-14
### Added
- Generic fasta parser.

## [0.1.3.6] - 2020-07-14
### Added
- Convertation from `Model`s to `PDB`.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.3.6
version: 0.1.3.7
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
1 change: 1 addition & 0 deletions src/Bio/FASTA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module Bio.FASTA
, fromFile
, toFile
, fastaP
, fastaPGeneric
) where

import Bio.FASTA.Parser
Expand Down
34 changes: 19 additions & 15 deletions src/Bio/FASTA/Parser.hs
Original file line number Diff line number Diff line change
@@ -1,31 +1,35 @@
module Bio.FASTA.Parser
( fastaP
, fastaPGeneric
) where

import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.Sequence (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, char, choice, endOfInput,
endOfLine, letter, many', many1',
takeWhile)
import Data.Text (Text, strip)
import Prelude hiding (takeWhile)
import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.Sequence (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, char, choice, endOfInput, endOfLine, many', many1', satisfy,
takeWhile)
import Data.Char (isLetter)
import Data.Text (Text, strip)
import Prelude hiding (takeWhile)

-- | Parser of .fasta file.
--
fastaP :: Parser (Fasta Char)
fastaP = many' item
fastaP = fastaPGeneric isLetter

item :: Parser (FastaItem Char)
item = FastaItem <$> seqName <*> fastaSeq
fastaPGeneric :: (Char -> Bool) -> Parser (Fasta Char)
fastaPGeneric = many' . item

seqName :: Parser (Text)
item :: (Char -> Bool) -> Parser (FastaItem Char)
item predicate = FastaItem <$> seqName <*> fastaSeq predicate

seqName :: Parser Text
seqName = strip <$> (char '>' *> tabs *> takeWhile (`notElem` ['\n', '\r']) <* tabs <* eol)

fastaSeq :: Parser (BareSequence Char)
fastaSeq = bareSequence . mconcat <$> many' line
fastaSeq :: (Char -> Bool) -> Parser (BareSequence Char)
fastaSeq predicate = bareSequence . mconcat <$> many' (line predicate)

line :: Parser String
line = concat <$> many1' (many1' letter <* many' (char ' ')) <* eol
line :: (Char -> Bool) -> Parser String
line predicate = concat <$> many1' (many1' (satisfy predicate) <* many' (char ' ')) <* eol

eol :: Parser ()
eol = tabs *> choice [slashN, endOfInput]
Expand Down
7 changes: 6 additions & 1 deletion src/Bio/Sequence/Class.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module Bio.Sequence.Class
, sequ
, markings
, weights
, bareSequ

-- classes for weights and markings of sequence
, IsMarking
Expand Down Expand Up @@ -87,7 +88,7 @@ instance Foldable (Sequence mk w) where
length = V.length . _sequ

instance Traversable (Sequence mk w) where
traverse f s@Sequence{..} = fmap (\newSeq -> s { _sequ = newSeq }) $ traverse f _sequ
traverse f s@Sequence{..} = (\newSeq -> s { _sequ = newSeq }) <$> traverse f _sequ

-- | Exported constructor for 'Sequence'. Should be used ONLY in module Bio.Sequence.
--
Expand All @@ -113,6 +114,7 @@ type BareSequence a = Sequence () () a
--------------------------------------------------------------------------------
-- Lenses for 'Sequence'.
-- We create only getters, so user that couldn't ruin 'Sequence's invariant.
-- But we can create a Lens for 'BareSequence', and it won't ruin any invariants.
--------------------------------------------------------------------------------

sequ :: Getter (Sequence mk w a) (Vector a)
Expand All @@ -124,6 +126,9 @@ markings = to _markings
weights :: Getter (Sequence mk w a) (Vector w)
weights = to _weights

bareSequ :: Lens' (BareSequence a) (Vector a)
bareSequ = lens _sequ (\s v -> s { _sequ = v })


--------------------------------------------------------------------------------
-- IsMarking class.
Expand Down

0 comments on commit b586829

Please sign in to comment.