Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 0.1.3.6. Added PDB writer #40

Merged
merged 5 commits into from
Jul 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## [Unreleased]

## [0.1.3.6] - 2020-07-14
### Added
- Convertation from `Model`s to `PDB`.
- Writer for `PDB`.
- `renameChains` function that renames chains in a model.

## [0.1.3.5] - 2020-05-26
### Fixed
- Correctly clean `BasecalledSequenceWithRawData`, including inner quality.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.3.5
version: 0.1.3.6
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
79 changes: 63 additions & 16 deletions src/Bio/PDB.hs
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
( modelsFromPDBText
, modelsFromPDBFile
( modelsFromPDBText, modelsToPDBText
, modelsFromPDBFile, modelsToPDBFile
) where

import qualified Bio.PDB.Type as PDB
import Bio.PDB.Reader (fromTextPDB, PDBWarnings)
import Bio.PDB.BondRestoring (restoreModelGlobalBonds, restoreChainLocalBonds, residueID)
import Bio.PDB.BondRestoring (residueID, restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure

import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)

import Data.Text as T (Text, singleton, unpack, strip)
import Data.Text.IO as TIO (readFile)
import Data.Map (Map)
import qualified Data.Map as M ((!), fromList)
import qualified Data.Vector as V
import Data.List (sort)
import Data.Map (Map)
import qualified Data.Map as M (fromList, (!))
import Data.Maybe (fromMaybe)

import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)

import Linear.V3 (V3 (..))

instance StructureModels PDB.PDB where
modelsOf PDB.PDB {..} = fmap mkModel models
where
Expand All @@ -49,7 +52,7 @@ instance StructureModels PDB.PDB where
safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
safeFirstAtom arr | V.length arr > 0 = arr V.! 0
| otherwise = error "Could not pick first atom"

mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue _ [] = error "Cound not make residue from empty list"
mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom)
Expand Down Expand Up @@ -80,3 +83,47 @@ modelsFromPDBText pdbText = do
(warnings, parsedPDB) <- fromTextPDB pdbText
let models = modelsOf parsedPDB
pure (warnings, models)

instance StructureSerializable PDB.PDB where
serializeModels models = PDB.PDB "Serialized model" pdbModels mempty mempty
where
pdbModels = fmap toPDBModel models

toPDBModel :: Model -> PDB.Model
toPDBModel = fmap toPDBChain . modelChains

toPDBChain :: Chain -> PDB.Chain
toPDBChain ch = fmap toPDBAtom . join $ (\r -> fmap ((,,) ch r) $ resAtoms r) <$> chainResidues ch

toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
toPDBAtom (Chain{..}, Residue{..}, Atom{..}) = res
where
res =
PDB.Atom
(getGlobalID atomId + 1)
atomName
nullAltLoc
resName
(T.head chainName)
resNumber
resInsertionCode
(atomCoords ^. _x)
(atomCoords ^. _y)
(atomCoords ^. _z)
occupancy
bFactor
atomElement
(T.pack $ show formalCharge)

nullAltLoc :: Char
nullAltLoc = ' '

-- | Writes models to the given path as PDB.
--
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile models = pdbToFile $ serializeModels models

-- | Converts models to their 'Text' representation as PDB.
--
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = pdbToText . serializeModels
5 changes: 3 additions & 2 deletions src/Bio/PDB/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ atomP :: Parser CoordLike
atomP = let atom = Atom <$>
(
(string "ATOM " *> -- (1 - 5) ATOM -- we extended atomSerial length to the left for one symbol
(read <$> count 6 notEndLineChar) <* space) -- (6 - 11) atomSerial
(read <$> count 6 notEndLineChar)) -- (6 - 11) atomSerial
<|> -- or
(string "HETATM" *> -- (1 - 6) HETATM
(read <$> count 5 notEndLineChar) <* space) -- (7 - 11) atomSerial
(read <$> count 5 notEndLineChar)) -- (7 - 11) atomSerial
)
<* space
AlexKaneRUS marked this conversation as resolved.
Show resolved Hide resolved
<*> (T.pack <$> count 4 notEndLineChar) -- (13 - 16) atomName
<*> notEndLineChar -- (17) atomAltLoc
<*> (T.pack <$> count 3 notEndLineChar) <* space -- (18 - 20) atomResName
Expand Down
155 changes: 155 additions & 0 deletions src/Bio/PDB/Writer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Bio.PDB.Writer
( pdbToFile
, pdbToText
) where


import Bio.PDB.Type (Atom (..), Chain, Model, PDB (..))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Text (Text)
import qualified Data.Text as T (cons, drop, init, intercalate,
last, length, pack, replicate,
singleton, splitAt, take)
import qualified Data.Text.IO as TIO (writeFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (all, foldl', fromList, last,
length, toList, zip)
import Text.Printf (printf)

-- | Writes 'PDB' to the given path.
--
pdbToFile :: MonadIO m => PDB -> FilePath -> m ()
pdbToFile pdb = liftIO . flip TIO.writeFile (pdbToText pdb)

-- | Converts 'PDB' to 'Text'.
--
pdbToText :: PDB -> Text
pdbToText PDB{..} = (<> newLine <> toPDBLine end)
$ T.intercalate newLine . V.toList . fmap (modelToText separateModels)
$ V.zip models $ V.fromList [1 ..]
where
separateModels = V.length models > 1

end :: Text
end = "END "

type TerAtom = Atom

modelToText :: Bool -> (Model, Int) -> Text
modelToText separateModels (pdbModel, modelInd) = modelPrefix <> atomsT <> modelSuffix
where
atomsT = T.intercalate newLine . V.toList . fmap atomOrTer . withTers $ pdbModel

modelPrefix | separateModels = toPDBLine (mdl <> spaceText 4 <> prependToS 4 modelInd) <> newLine
| otherwise = ""

modelSuffix | separateModels = newLine <> toPDBLine endmdl
| otherwise = ""

mdl :: Text
mdl = "MODEL "

endmdl :: Text
endmdl = "ENDMDL "

withTers :: Vector Chain -> Vector (Either Atom TerAtom)
withTers = snd . V.foldl' addTer (0, mempty)
where
addTer :: (Int, Vector (Either Atom TerAtom)) -> Chain -> (Int, Vector (Either Atom TerAtom))
addTer (add, res) cur = if V.all (isHetatm . atomResName) cur then (add, newRes) else withTer
where
ter = addSerial (add + 1) $ V.last cur
newRes = res <> fmap (Left . addSerial add) cur

withTer = (add + 1, newRes <> pure (Right ter))

addSerial :: Int -> Atom -> Atom
addSerial i at@Atom{..} = at { atomSerial = atomSerial + i }

atomOrTer :: Either Atom TerAtom -> Text
atomOrTer = either atomToText terAtomToText

terAtomToText :: Atom -> Text
terAtomToText at = toPDBLine $ pref <> spaceText 6 <> suf
where
t = (ter <>) . T.take 21 . T.drop 6 $ atomToText at
(pref, suf) = T.drop 6 <$> T.splitAt 11 t

ter :: Text
ter = "TER "

atomToText :: Atom -> Text
atomToText Atom{..} = res
where
recordName | isHetatm atomResName = hetatm
| otherwise = atm

serial = prependToS 5 atomSerial
name = (\t -> if T.last t == space then T.cons space $ T.init t else t) $ appendTo 4 atomName
altLoc = T.singleton atomAltLoc
resName = prependTo 3 atomResName
chainID = T.singleton atomChainID
resSeq = prependToS 4 atomResSeq
iCode = T.singleton atomICode
x = prependTo 8 $ printFloat 3 atomX
y = prependTo 8 $ printFloat 3 atomY
z = prependTo 8 $ printFloat 3 atomZ
occupancy = prependTo 6 $ printFloat 2 atomOccupancy
tempFactor = prependTo 6 $ printFloat 2 atomTempFactor
element = prependTo 2 atomElement

charge | atomCharge /= zeroCharge = prependTo 2 atomCharge
| otherwise = spaceText 2

res = recordName <> serial <> spaceText 1 <> name <> altLoc
<> resName <> spaceText 1 <> chainID <> resSeq <> iCode <> spaceText 3
<> x <> y <> z <> occupancy <> tempFactor <> spaceText 10
<> element <> charge

atm :: Text
atm = "ATOM "

hetatm :: Text
hetatm = "HETATM"

zeroCharge :: Text
zeroCharge = "0"

printFloat :: Int -> Float -> Text
printFloat after f = T.pack $ printf "%.*f" after f

--------------------------------------------------------------------------------
-- Utility functions.
--------------------------------------------------------------------------------

isHetatm :: Text -> Bool
isHetatm = (`notElem` canonicalAminoAcids)
where
canonicalAminoAcids = [ "ACE", "ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN"
, "GLY", "HIS", "HID", "HIE", "HIP", "ILE", "LEU", "LYS", "LYN"
, "MET", "NMA", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"
]

toPDBLine :: Text -> Text
toPDBLine = appendTo 80

prependToS :: Show a => Int -> a -> Text
prependToS i = prependTo i . T.pack . show

prependTo :: Int -> Text -> Text
prependTo i t = spaceText (i - T.length t) <> t

appendTo :: Int -> Text -> Text
appendTo i t = t <> spaceText (i - T.length t)

newLine :: Text
newLine = "\n"

spaceText :: Int -> Text
spaceText = flip T.replicate " "

space :: Char
space = ' '
16 changes: 14 additions & 2 deletions src/Bio/Structure/Functions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@ module Bio.Structure.Functions
, chain, globalBond
, residue
, atom, localBond
, renameChains
) where

import Bio.Structure (Atom (..), Bond (..), Chain (..),
GlobalID (..), LocalID (..), Model (..),
Residue (..), atoms, chains, globalBonds,
localBonds, residues)
import Control.Lens (Traversal', each)
import qualified Data.Map.Strict as M (fromList, (!))
import Control.Lens (Traversal', each, (%~), (&))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, (!), (!?))
import Data.Set (Set)
import qualified Data.Set as S (fromList, notMember, unions)
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V (filter, fromList, length, toList, unzip)

Expand Down Expand Up @@ -41,6 +44,15 @@ atom = atoms . each
localBond :: Traversal' Residue (Bond LocalID)
localBond = localBonds . each

-- | Rename chains of a given model according to the given mapping.
-- If chain is not present in the mapping then its name won't be changed.
--
renameChains :: Model -> Map Text Text -> Model
renameChains model mapping = model & chain %~ renameChain
where
renameChain :: Chain -> Chain
renameChain ch@Chain{..} = ch { chainName = maybe chainName id $ mapping M.!? chainName }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Можно так: mapping ^? ix chainName . non chainName

А для самого chainName линзы нет? Чтобы написать

model & chain . chainName %~ \n -> mapping ^? ix n . non n

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Вот, к сожалению, тут не нашлось линзы, да...


-- | Takes predicate on 'Atom's of 'Model' and returns new 'Model' containing only atoms
-- satisfying given predicate.
--
Expand Down
Loading