1
- import os
1
+ from pathlib import Path
2
2
3
3
import numpy as np
4
- import pytest
4
+ from conftest import ASSETS
5
5
from scipy .stats import wasserstein_distance
6
6
from sklearn .decomposition import KernelPCA
7
7
8
- import qmllib
9
8
from qmllib .kernels import (
10
9
gaussian_kernel ,
11
10
gaussian_kernel_symmetric ,
17
16
sargan_kernel ,
18
17
wasserstein_kernel ,
19
18
)
19
+ from qmllib .representations import generate_bob
20
+ from qmllib .utils .xyz_format import read_xyz
20
21
21
22
22
- def get_energies (filename ):
23
+ def get_energies (filename : Path ):
23
24
"""Returns a dictionary with heats of formation for each xyz-file."""
24
25
25
- f = open (filename , "r" )
26
- lines = f .readlines ()
27
- f .close ()
26
+ with open (filename , "r" ) as f :
27
+ lines = f .readlines ()
28
28
29
29
energies = dict ()
30
30
@@ -239,16 +239,10 @@ def array_nan_close(a, b):
239
239
return np .allclose (a [m ], b [m ], atol = 1e-8 , rtol = 0.0 )
240
240
241
241
242
- @pytest .mark .skip (reason = "Removing all Compound classes" )
243
242
def test_kpca ():
244
243
245
- test_dir = os .path .dirname (os .path .realpath (__file__ ))
246
-
247
244
# Parse file containing PBE0/def2-TZVP heats of formation and xyz filenam
248
- data = get_energies (test_dir + "/data/hof_qm7.txt" )
249
-
250
- # Generate a list of qmllib.Compound() objects
251
- mols = []
245
+ data = get_energies (ASSETS / "hof_qm7.txt" )
252
246
253
247
keys = sorted (data .keys ())
254
248
@@ -257,17 +251,24 @@ def test_kpca():
257
251
258
252
n_mols = 100
259
253
254
+ representations = []
255
+
260
256
for xyz_file in keys [:n_mols ]:
261
257
262
- mol = qmllib .Compound (xyz = test_dir + "/qm7/" + xyz_file )
263
- mol .properties = data [xyz_file ]
264
- mol .generate_bob ()
265
- mols .append (mol )
258
+ filename = ASSETS / "qm7" / xyz_file
259
+ coordinates , atoms = read_xyz (filename )
266
260
267
- X = np .array ([mol .representation for mol in mols ])
261
+ atomtypes = np .unique (atoms )
262
+ representation = generate_bob (atoms , coordinates , atomtypes )
263
+ representations .append (representation )
264
+
265
+ X = np .array ([representation for representation in representations ])
268
266
K = laplacian_kernel (X , X , 2e5 )
269
267
268
+ # calculate pca
270
269
pcas_qml = kpca (K , n = 10 )
270
+
271
+ # Calculate with sklearn
271
272
pcas_sklearn = KernelPCA (10 , eigen_solver = "dense" , kernel = "precomputed" ).fit_transform (K )
272
273
273
274
assert array_nan_close (
0 commit comments