Skip to content

Commit

Permalink
Added prefix to canned methods.
Browse files Browse the repository at this point in the history
Fixes #11
  • Loading branch information
sverhoeven committed Feb 11, 2016
1 parent e967ddb commit 955b4bf
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
17 changes: 12 additions & 5 deletions kripodb/canned.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@ def similarities(queries, distance_matrix_filename, cutoff, limit=1000):
return pd.DataFrame(hits)


def fragments_by_pdb_codes(pdb_codes, fragments_db_filename):
def fragments_by_pdb_codes(pdb_codes, fragments_db_filename, prefix=''):
"""Retrieve fragments based on PDB codes.
See http://www.rcsb.org/pdb/ for PDB structures.
Args:
pdb_codes (List[str]): List of PDB codes
fragments_db_filename (str): Filename of fragments db
prefix (str): Prefix for output columns
Examples:
Fetch fragments of '2n2k' PDB code
Expand All @@ -86,18 +87,22 @@ def fragments_by_pdb_codes(pdb_codes, fragments_db_filename):
for pdb_code in pdb_codes:
for fragment in fragmentsdb.by_pdb_code(pdb_code):
fragments.append(fragment)
return pd.DataFrame(fragments)

df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
return df

def fragments_by_id(fragment_ids, fragments_db_filename):

def fragments_by_id(fragment_ids, fragments_db_filename, prefix=''):
"""Retrieve fragments based on fragment identifier.
Args:
fragment_ids (List[str]): List of fragment identifiers
fragments_db_filename (str): Filename of fragments db
prefix (str): Prefix for output columns
Examples:
Fetch fragments of '2n2k' PDB code
Fetch fragments of '2n2k_MTN_frag1' fragment identifier
>>> from kripodb.canned import fragments_by_id
>>> fragment_ids = pd.Series(['2n2k_MTN_frag1'])
Expand All @@ -110,4 +115,6 @@ def fragments_by_id(fragment_ids, fragments_db_filename):
"""
fragmentsdb = FragmentsDb(fragments_db_filename)
fragments = [fragmentsdb[frag_id] for frag_id in fragment_ids]
return pd.DataFrame(fragments)
df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
return df
38 changes: 38 additions & 0 deletions tests/test_canned.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ def test_fragments_by_pdb_codes():
assert_frame_equal(result, pd.DataFrame(expected))


def test_fragments_by_pdb_codes_with_prefix():
pdb_codes = pd.Series(['3wxj'])

result = fragments_by_pdb_codes(pdb_codes, 'data/fragments.sqlite', 'prefix_')

# ignoring molecules
result.drop('prefix_mol', axis=1, inplace=True, errors='ignore')

expected = [{
'prefix_nr_r_groups': 0, 'prefix_smiles': 'O=P([O-])([O-])OCC(O)CO', 'prefix_pdb_code': '3wxj',
'prefix_atom_codes': 'O1,C1,C2,O2,C3,O1P,O4P,O2P,O3P,P', 'prefix_het_code': 'G3P', 'prefix_hash_code': 'ee9013689ff298d4',
'prefix_frag_nr': 1, 'prefix_frag_id': '3wxj_G3P_frag1', 'prefix_rowid': 352104, 'prefix_het_chain': 'B', 'prefix_het_seq_nr': 601,
'prefix_prot_chain': 'B', 'prefix_uniprot_acc': 'D3KVM3', 'prefix_uniprot_name': None, 'prefix_prot_name': 'Glycerol kinase',
'prefix_ec_number': '2.7.1.30',
'prefix_pdb_title': 'Crystal structure of trypanosoma brucei gambiense glycerol kinase in complex with glycerol 3-phosphate',
}]
assert_frame_equal(result, pd.DataFrame(expected))


def test_fragments_by_id():
frag_ids = pd.Series(['2n2k_MTN_frag1'])

Expand All @@ -87,4 +106,23 @@ def test_fragments_by_id():
'ec_number': None,
'pdb_title': 'Ensemble structure of the closed state of Lys63-linked diubiquitin in the absence of a ligand',
}]
assert_frame_equal(result, pd.DataFrame(expected))


def test_fragments_by_id_with_prefix():
frag_ids = pd.Series(['2n2k_MTN_frag1'])

result = fragments_by_id(frag_ids, 'data/fragments.sqlite', 'prefix_')

# ignoring molecules
result.drop('prefix_mol', axis=1, inplace=True)
expected = [{
'prefix_nr_r_groups': 0, 'prefix_smiles': 'CC1(C)C=C(C[S-])C(C)(C)[NH+]1O', 'prefix_pdb_code': '2n2k',
'prefix_atom_codes': 'O1,N1,C1,C2,C3,C4,S1,C5,C6,C7,C8,C9', 'prefix_het_code': 'MTN',
'prefix_hash_code': 'd491952cd7c9dc30', 'prefix_frag_nr': 1, 'prefix_frag_id': '2n2k_MTN_frag1',
'prefix_rowid': 175992, 'prefix_het_chain': 'A', 'prefix_het_seq_nr': 101, 'prefix_prot_chain': 'A',
'prefix_uniprot_acc': 'P0CG48', 'prefix_uniprot_name': 'Polyubiquitin-C', 'prefix_prot_name': 'ubiquitin',
'prefix_ec_number': None,
'prefix_pdb_title': 'Ensemble structure of the closed state of Lys63-linked diubiquitin in the absence of a ligand',
}]
assert_frame_equal(result, pd.DataFrame(expected))

0 comments on commit 955b4bf

Please sign in to comment.