Skip to content

Commit

Permalink
Version 0.1.3.6. Added PDB writer (#40)
Browse files Browse the repository at this point in the history
* added writer for PDB

* more

* Version 0.1.3.6. Added PDB writer

* fixes

* flip
  • Loading branch information
AlexKaneRUS authored Jul 14, 2020
1 parent 2d67bf7 commit e53b634
Show file tree
Hide file tree
Showing 11 changed files with 13,316 additions and 22 deletions.
6 changes: 6 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## [Unreleased]

## [0.1.3.6] - 2020-07-14
### Added
- Convertation from `Model`s to `PDB`.
- Writer for `PDB`.
- `renameChains` function that renames chains in a model.

## [0.1.3.5] - 2020-05-26
### Fixed
- Correctly clean `BasecalledSequenceWithRawData`, including inner quality.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.3.5
version: 0.1.3.6
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
79 changes: 63 additions & 16 deletions src/Bio/PDB.hs
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
( modelsFromPDBText
, modelsFromPDBFile
( modelsFromPDBText, modelsToPDBText
, modelsFromPDBFile, modelsToPDBFile
) where

import qualified Bio.PDB.Type as PDB
import Bio.PDB.Reader (fromTextPDB, PDBWarnings)
import Bio.PDB.BondRestoring (restoreModelGlobalBonds, restoreChainLocalBonds, residueID)
import Bio.PDB.BondRestoring (residueID, restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure

import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)

import Data.Text as T (Text, singleton, unpack, strip)
import Data.Text.IO as TIO (readFile)
import Data.Map (Map)
import qualified Data.Map as M ((!), fromList)
import qualified Data.Vector as V
import Data.List (sort)
import Data.Map (Map)
import qualified Data.Map as M (fromList, (!))
import Data.Maybe (fromMaybe)

import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)

import Linear.V3 (V3 (..))

instance StructureModels PDB.PDB where
modelsOf PDB.PDB {..} = fmap mkModel models
where
Expand All @@ -49,7 +52,7 @@ instance StructureModels PDB.PDB where
safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
safeFirstAtom arr | V.length arr > 0 = arr V.! 0
| otherwise = error "Could not pick first atom"

mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue _ [] = error "Cound not make residue from empty list"
mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom)
Expand Down Expand Up @@ -80,3 +83,47 @@ modelsFromPDBText pdbText = do
(warnings, parsedPDB) <- fromTextPDB pdbText
let models = modelsOf parsedPDB
pure (warnings, models)

instance StructureSerializable PDB.PDB where
serializeModels models = PDB.PDB "Serialized model" pdbModels mempty mempty
where
pdbModels = fmap toPDBModel models

toPDBModel :: Model -> PDB.Model
toPDBModel = fmap toPDBChain . modelChains

toPDBChain :: Chain -> PDB.Chain
toPDBChain ch = fmap toPDBAtom . join $ (\r -> fmap ((,,) ch r) $ resAtoms r) <$> chainResidues ch

toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
toPDBAtom (Chain{..}, Residue{..}, Atom{..}) = res
where
res =
PDB.Atom
(getGlobalID atomId + 1)
atomName
nullAltLoc
resName
(T.head chainName)
resNumber
resInsertionCode
(atomCoords ^. _x)
(atomCoords ^. _y)
(atomCoords ^. _z)
occupancy
bFactor
atomElement
(T.pack $ show formalCharge)

nullAltLoc :: Char
nullAltLoc = ' '

-- | Writes models to the given path as PDB.
--
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile models = pdbToFile $ serializeModels models

-- | Converts models to their 'Text' representation as PDB.
--
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = pdbToText . serializeModels
5 changes: 3 additions & 2 deletions src/Bio/PDB/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ atomP :: Parser CoordLike
atomP = let atom = Atom <$>
(
(string "ATOM " *> -- (1 - 5) ATOM -- we extended atomSerial length to the left for one symbol
(read <$> count 6 notEndLineChar) <* space) -- (6 - 11) atomSerial
(read <$> count 6 notEndLineChar)) -- (6 - 11) atomSerial
<|> -- or
(string "HETATM" *> -- (1 - 6) HETATM
(read <$> count 5 notEndLineChar) <* space) -- (7 - 11) atomSerial
(read <$> count 5 notEndLineChar)) -- (7 - 11) atomSerial
)
<* space
<*> (T.pack <$> count 4 notEndLineChar) -- (13 - 16) atomName
<*> notEndLineChar -- (17) atomAltLoc
<*> (T.pack <$> count 3 notEndLineChar) <* space -- (18 - 20) atomResName
Expand Down
155 changes: 155 additions & 0 deletions src/Bio/PDB/Writer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Bio.PDB.Writer
( pdbToFile
, pdbToText
) where


import Bio.PDB.Type (Atom (..), Chain, Model, PDB (..))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Text (Text)
import qualified Data.Text as T (cons, drop, init, intercalate,
last, length, pack, replicate,
singleton, splitAt, take)
import qualified Data.Text.IO as TIO (writeFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (all, foldl', fromList, last,
length, toList, zip)
import Text.Printf (printf)

-- | Writes 'PDB' to the given path.
--
pdbToFile :: MonadIO m => PDB -> FilePath -> m ()
pdbToFile pdb = liftIO . flip TIO.writeFile (pdbToText pdb)

-- | Converts 'PDB' to 'Text'.
--
pdbToText :: PDB -> Text
pdbToText PDB{..} = (<> newLine <> toPDBLine end)
$ T.intercalate newLine . V.toList . fmap (modelToText separateModels)
$ V.zip models $ V.fromList [1 ..]
where
separateModels = V.length models > 1

end :: Text
end = "END "

type TerAtom = Atom

modelToText :: Bool -> (Model, Int) -> Text
modelToText separateModels (pdbModel, modelInd) = modelPrefix <> atomsT <> modelSuffix
where
atomsT = T.intercalate newLine . V.toList . fmap atomOrTer . withTers $ pdbModel

modelPrefix | separateModels = toPDBLine (mdl <> spaceText 4 <> prependToS 4 modelInd) <> newLine
| otherwise = ""

modelSuffix | separateModels = newLine <> toPDBLine endmdl
| otherwise = ""

mdl :: Text
mdl = "MODEL "

endmdl :: Text
endmdl = "ENDMDL "

withTers :: Vector Chain -> Vector (Either Atom TerAtom)
withTers = snd . V.foldl' addTer (0, mempty)
where
addTer :: (Int, Vector (Either Atom TerAtom)) -> Chain -> (Int, Vector (Either Atom TerAtom))
addTer (add, res) cur = if V.all (isHetatm . atomResName) cur then (add, newRes) else withTer
where
ter = addSerial (add + 1) $ V.last cur
newRes = res <> fmap (Left . addSerial add) cur

withTer = (add + 1, newRes <> pure (Right ter))

addSerial :: Int -> Atom -> Atom
addSerial i at@Atom{..} = at { atomSerial = atomSerial + i }

atomOrTer :: Either Atom TerAtom -> Text
atomOrTer = either atomToText terAtomToText

terAtomToText :: Atom -> Text
terAtomToText at = toPDBLine $ pref <> spaceText 6 <> suf
where
t = (ter <>) . T.take 21 . T.drop 6 $ atomToText at
(pref, suf) = T.drop 6 <$> T.splitAt 11 t

ter :: Text
ter = "TER "

atomToText :: Atom -> Text
atomToText Atom{..} = res
where
recordName | isHetatm atomResName = hetatm
| otherwise = atm

serial = prependToS 5 atomSerial
name = (\t -> if T.last t == space then T.cons space $ T.init t else t) $ appendTo 4 atomName
altLoc = T.singleton atomAltLoc
resName = prependTo 3 atomResName
chainID = T.singleton atomChainID
resSeq = prependToS 4 atomResSeq
iCode = T.singleton atomICode
x = prependTo 8 $ printFloat 3 atomX
y = prependTo 8 $ printFloat 3 atomY
z = prependTo 8 $ printFloat 3 atomZ
occupancy = prependTo 6 $ printFloat 2 atomOccupancy
tempFactor = prependTo 6 $ printFloat 2 atomTempFactor
element = prependTo 2 atomElement

charge | atomCharge /= zeroCharge = prependTo 2 atomCharge
| otherwise = spaceText 2

res = recordName <> serial <> spaceText 1 <> name <> altLoc
<> resName <> spaceText 1 <> chainID <> resSeq <> iCode <> spaceText 3
<> x <> y <> z <> occupancy <> tempFactor <> spaceText 10
<> element <> charge

atm :: Text
atm = "ATOM "

hetatm :: Text
hetatm = "HETATM"

zeroCharge :: Text
zeroCharge = "0"

printFloat :: Int -> Float -> Text
printFloat after f = T.pack $ printf "%.*f" after f

--------------------------------------------------------------------------------
-- Utility functions.
--------------------------------------------------------------------------------

isHetatm :: Text -> Bool
isHetatm = (`notElem` canonicalAminoAcids)
where
canonicalAminoAcids = [ "ACE", "ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN"
, "GLY", "HIS", "HID", "HIE", "HIP", "ILE", "LEU", "LYS", "LYN"
, "MET", "NMA", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"
]

toPDBLine :: Text -> Text
toPDBLine = appendTo 80

prependToS :: Show a => Int -> a -> Text
prependToS i = prependTo i . T.pack . show

prependTo :: Int -> Text -> Text
prependTo i t = spaceText (i - T.length t) <> t

appendTo :: Int -> Text -> Text
appendTo i t = t <> spaceText (i - T.length t)

newLine :: Text
newLine = "\n"

spaceText :: Int -> Text
spaceText = flip T.replicate " "

space :: Char
space = ' '
16 changes: 14 additions & 2 deletions src/Bio/Structure/Functions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@ module Bio.Structure.Functions
, chain, globalBond
, residue
, atom, localBond
, renameChains
) where

import Bio.Structure (Atom (..), Bond (..), Chain (..),
GlobalID (..), LocalID (..), Model (..),
Residue (..), atoms, chains, globalBonds,
localBonds, residues)
import Control.Lens (Traversal', each)
import qualified Data.Map.Strict as M (fromList, (!))
import Control.Lens (Traversal', each, (%~), (&))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, (!), (!?))
import Data.Set (Set)
import qualified Data.Set as S (fromList, notMember, unions)
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V (filter, fromList, length, toList, unzip)

Expand Down Expand Up @@ -41,6 +44,15 @@ atom = atoms . each
localBond :: Traversal' Residue (Bond LocalID)
localBond = localBonds . each

-- | Rename chains of a given model according to the given mapping.
-- If chain is not present in the mapping then its name won't be changed.
--
renameChains :: Model -> Map Text Text -> Model
renameChains model mapping = model & chain %~ renameChain
where
renameChain :: Chain -> Chain
renameChain ch@Chain{..} = ch { chainName = maybe chainName id $ mapping M.!? chainName }

-- | Takes predicate on 'Atom's of 'Model' and returns new 'Model' containing only atoms
-- satisfying given predicate.
--
Expand Down
Loading

0 comments on commit e53b634

Please sign in to comment.