-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathe_above_hull.py
164 lines (129 loc) · 5.47 KB
/
e_above_hull.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
Copyright (c) Facebook, Inc. and its affiliates.
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""
"""
Build a PatchedPhaseDiagram from all MP ComputedStructureEntries for calculating
DFT-ground truth convex hull energies.
"""
import warnings
import tempfile
from pymatgen.core import Structure
import numpy as np
import pandas as pd
from pymatgen.analysis.phase_diagram import PatchedPhaseDiagram
from pymatgen.entries.compatibility import MaterialsProject2020Compatibility
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
from tqdm import tqdm
from pymatgen.io.vasp.inputs import Incar, Poscar
from pymatgen.io.vasp.sets import MPRelaxSet
import tqdm
import pandas as pd
from m3gnet.models import Relaxer
from pymatgen.core.structure import Structure
from crystal import cif_str_to_crystal
def m3gnet_relaxed_energy(cif_str):
structure = Structure.from_str(cif_str, fmt="cif")
relaxer = Relaxer() # This loads the default pre-trained model
relax_results = relaxer.relax(structure)
final_structure = relax_results['final_structure']
final_energy_per_atom = float(relax_results['trajectory'].energies[-1])
return final_energy_per_atom, final_structure
from timeout_decorator import timeout
@timeout(30)
def call_m3gnet_relaxed_energy(cif_str):
return m3gnet_relaxed_energy(cif_str)
def label_energies(filename):
df = pd.read_csv(filename)
new_df = []
for r in tqdm.tqdm(df.to_dict('records')):
cif_str = r['cif']
crystal = cif_str_to_crystal(cif_str)
if crystal is None or not crystal.valid:
continue
structure = Structure.from_str(cif_str, fmt="cif")
if len(structure) == 1:
continue
try:
e, relaxed_s = call_m3gnet_relaxed_energy(cif_str)
r['m3gnet_relaxed_energy'] = e
r['m3gnet_relaxed_cif'] = relaxed_s.to(fmt="cif")
except Exception as e:
continue
new_df.append(r)
new_df = pd.DataFrame(new_df)
new_filename = filename.replace(".csv","") + "_m3gnet_relaxed_energy.csv"
new_df.to_csv(new_filename)
return new_df
def generate_CSE(structure, m3gnet_energy):
# Write VASP inputs files as if we were going to do a standard MP run
# this is mainly necessary to get the right U values / etc
b = MPRelaxSet(structure)
with tempfile.TemporaryDirectory() as tmpdirname:
b.write_input(f"{tmpdirname}/", potcar_spec=True)
poscar = Poscar.from_file(f"{tmpdirname}/POSCAR")
incar = Incar.from_file(f"{tmpdirname}/INCAR")
clean_structure = Structure.from_file(f"{tmpdirname}/POSCAR")
# Get the U values and figure out if we should have run a GGA+U calc
param = {"hubbards": {}}
if "LDAUU" in incar:
param["hubbards"] = dict(zip(poscar.site_symbols, incar["LDAUU"]))
param["is_hubbard"] = (
incar.get("LDAU", True) and sum(param["hubbards"].values()) > 0
)
if param["is_hubbard"]:
param["run_type"] = "GGA+U"
# Make a ComputedStructureEntry without the correction
cse_d = {
"structure": clean_structure,
"energy": m3gnet_energy,
"correction": 0.0,
"parameters": param,
}
# Apply the MP 2020 correction scheme (anion/+U/etc)
cse = ComputedStructureEntry.from_dict(cse_d)
_ = MaterialsProject2020Compatibility(check_potcar=False).process_entries(
cse,
clean=True,
)
# Return the final CSE (notice that the composition/etc is also clean, not things like Fe3+)!
return cse
def get_e_above_hull(structures_fn, entries_fn):
print(f"Loading MP ComputedStructureEntries from {entries_fn}")
df = pd.read_json(entries_fn)
#filter to only df entries that contain the substring "GGA" in the column 'index'
df = df[df['index'].str.contains("GGA")]
print(len(df))
mp_computed_entries = [ComputedEntry.from_dict(x) for x in tqdm(df.entry) if 'GGA' in x['parameters']['run_type']]
mp_computed_entries = [entry for entry in mp_computed_entries if not np.any(['R2SCAN' in a.name for a in entry.energy_adjustments])]
ppd_mp = PatchedPhaseDiagram(mp_computed_entries, verbose=True)
df = pd.read_csv(fn)
new_df = []
for d in tqdm(df.to_dict(orient="records")):
try:
structure = Structure.from_str(d["m3gnet_relaxed_cif"], fmt="cif")
energy = d["m3gnet_relaxed_energy"]
cse = generate_CSE(structure, energy)
e_above_hull = ppd_mp.get_e_above_hull(cse, allow_negative=True)
d["e_above_hull"] = e_above_hull
new_df.append(d)
except Exception as e:
print(e)
continue
new_df = pd.DataFrame(new_df)
new_df.to_csv("e_above_hull_llama_70b.csv", index=False)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--structures_fn", type=str, default="data/llm/relaxations/relaxations.csv")
parser.add_argument("--entries_fn", type=str, default="/private/home/ngruver/ocp-modeling-dev/llm/2023-07-13-mp-computed-structure-entries.json.gz")
args = parser.parse_args()
#suppress tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
new_fn = label_energies(args.structures_fn, args.entries_fn)
warnings.filterwarnings("ignore")
get_e_above_hull(new_fn)