forked from deepmodeling/deepks-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_xyz.py
161 lines (139 loc) · 5.65 KB
/
convert_xyz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import numpy as np
from glob import glob
BOHR = 0.52917721092
ELEMENTS = ['X', # Ghost
'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne',
'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca',
'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr',
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',
'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th',
'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds',
'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
]
CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS)))
def parse_xyz(filename):
with open(filename) as fp:
natom = int(fp.readline())
comments = fp.readline().strip()
atom_str = fp.readlines()
atom_list = [a.split() for a in atom_str if a.strip()]
elements = [a[0] for a in atom_list]
coords = np.array([a[1:] for a in atom_list], dtype=float)
return natom, comments, elements, coords
def parse_unit(rawunit):
if isinstance(rawunit, str):
try:
unit = float(rawunit)
except ValueError:
if rawunit.upper().startswith(('B', 'AU')):
unit = BOHR
else: #unit[:3].upper() == 'ANG':
unit = 1.
else:
unit = rawunit
return unit
def load_array(file):
ext = os.path.splitext(file)[-1]
if "npy" in ext:
return np.load(file)
elif "npz" in ext:
raise NotImplementedError
else:
try:
arr = np.loadtxt(file)
except ValueError:
arr = np.loadtxt(file, dtype=str)
return arr
def load_glob(pattern):
[fn] = glob(pattern)
return load_array(fn)
def load_system(xyz_file):
base, ext = os.path.splitext(xyz_file)
assert ext == '.xyz'
natom, _, ele, coord = parse_xyz(xyz_file)
try:
energy = load_glob(f"{base}.energy*").reshape(1)
except:
energy = None
try:
force = load_glob(f"{base}.force*").reshape(natom, 3)
except:
force = None
try:
dm = load_glob(f"{base}.dm*")
nao = np.sqrt(dm.size).astype(int)
dm = dm.reshape(nao, nao)
except:
dm = None
return ele, coord, energy, force, dm
def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False):
print(f"saving to {dump_dir} ... ", end="", flush=True)
os.makedirs(dump_dir, exist_ok=True)
if not xyz_files:
print("empty list! did nothing")
return
unit = parse_unit(unit)
a_ele, a_coord, a_energy, a_force, a_dm = map(np.array,
zip(*[load_system(fl) for fl in xyz_files]))
a_coord /= unit
if ext_type:
ele = a_ele[0]
assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same"
np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s")
np.save(os.path.join(dump_dir, "coord.npy"), a_coord)
else:
a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele]
a_atom = np.concatenate([a_chg, a_coord], axis=-1)
np.save(os.path.join(dump_dir, "atom.npy"), a_atom)
if not all(ene is None for ene in a_energy):
assert not any(ele is None for ele in a_energy)
np.save(os.path.join(dump_dir, "energy.npy"), a_energy)
if not all(ff is None for ff in a_force):
assert not any(ff is None for ff in a_force)
a_force *= unit
np.save(os.path.join(dump_dir, "force.npy"), a_force)
if not all(dm is None for dm in a_dm):
assert not any(dm is None for dm in a_dm)
np.save(os.path.join(dump_dir, "dm.npy"), a_dm)
print(f"finished", flush=True)
return
def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False):
if isinstance(xyz_files, str):
xyz_files = [xyz_files]
if group_size <= 0:
dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type)
return
ns = len(xyz_files)
ngroup = np.ceil(ns / group_size).astype(int)
nd = max(len(str(ngroup)), 2)
for i in range(ngroup):
dump_systems(xyz_files[i*group_size:(i+1)*group_size],
os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"),
unit=unit, ext_type=ext_type)
return
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="convert .xyz files and corresponding properties "
"into systems with .npy files grouped in folders.",
argument_default=argparse.SUPPRESS)
parser.add_argument("xyz_files", metavar='FILE', nargs="+",
help="input xyz files")
parser.add_argument("-d", "--dump-dir",
help="directory of dumped system, default is current dir")
parser.add_argument("-U", "--unit",
help="length unit used to save npy files (assume xyz in Angstrom)")
parser.add_argument("-G", "--group-size", type=int,
help="if positive, split data into sub systems with given size, default: -1")
parser.add_argument("-P", "--group-prefix",
help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys")
parser.add_argument("-T", "--ext-type", action="store_true",
help="if set, save the element type into separete `type.raw` file")
args = parser.parse_args()
main(**vars(args))