Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor simplify #730

Merged
merged 7 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
| init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
| ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
| ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
| init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
| init_batch_size | String of integer | [8] | Each number is the batch_size of corresponding system for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
| sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
| **sys_configs** | List of list of string | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
Expand Down
70 changes: 25 additions & 45 deletions dpgen/generator/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
from dpgen.generator.lib.ele_temp import NBandsEsti
from dpgen.remote.decide_machine import convert_mdata
from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission
from dpgen.util import sepline
from dpgen.util import sepline, expand_sys_str
from dpgen import ROOT_PATH
from pymatgen.io.vasp import Incar,Kpoints,Potcar
from dpgen.auto_test.lib.vasp import make_kspacing_kpoints
Expand Down Expand Up @@ -287,13 +287,10 @@ def make_train (iter_index,
# make sure all init_data_sys has the batch size -- for the following `zip`
assert (len(init_data_sys_) <= len(init_batch_size_))
for ii, ss in zip(init_data_sys_, init_batch_size_) :
if jdata.get('init_multi_systems', False):
for single_sys in os.listdir(os.path.join(work_path, 'data.init', ii)):
init_data_sys.append(os.path.join('..', 'data.init', ii, single_sys))
init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii, single_sys)))
else:
init_data_sys.append(os.path.join('..', 'data.init', ii))
init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii)))
sys_paths = expand_sys_str(os.path.join(init_data_prefix, ii))
for single_sys in sys_paths:
init_data_sys.append(os.path.normpath(os.path.join('..', 'data.init', ii, os.path.relpath(single_sys, os.path.join(init_data_prefix, ii)))))
init_batch_size.append(detect_batch_size(ss, single_sys))
old_range = None
if iter_index > 0 :
for ii in range(iter_index) :
Expand All @@ -307,25 +304,16 @@ def make_train (iter_index,
sys_batch_size = ["auto" for aa in range(len(sys_list))]
for jj in fp_data_sys :
sys_idx = int(jj.split('.')[-1])
if jdata.get('use_clusters', False):
nframes = 0
for sys_single in os.listdir(jj):
tmp_box = np.loadtxt(os.path.join(jj, sys_single, 'box.raw'))
tmp_box = np.reshape(tmp_box, [-1,9])
nframes += tmp_box.shape[0]
if nframes < fp_task_min :
log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
continue
for sys_single in os.listdir(jj):
init_data_sys.append(os.path.join('..', 'data.iters', jj, sys_single))
init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], os.path.join(jj, sys_single)))
else:
nframes = dpdata.System(jj, 'deepmd/npy').get_nframes()
if nframes < fp_task_min :
log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
continue
init_data_sys.append(os.path.join('..', 'data.iters', jj))
init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], jj))
sys_paths = expand_sys_str(jj)
nframes = 0
for sys_single in sys_paths:
nframes += dpdata.LabeledSystem(sys_single, fmt="deepmd/npy").get_nframes()
if nframes < fp_task_min :
log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
continue
for sys_single in sys_paths:
init_data_sys.append(os.path.normpath(os.path.join('..', 'data.iters', sys_single)))
init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], sys_single))
# establish tasks
jinput = jdata['default_training_param']
try:
Expand Down Expand Up @@ -567,25 +555,17 @@ def run_train (iter_index,
os.chdir(work_path)
fp_data = glob.glob(os.path.join('data.iters', 'iter.*', '02.fp', 'data.*'))
for ii in init_data_sys :
if jdata.get('init_multi_systems', False):
for single_sys in os.listdir(os.path.join(ii)):
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
else:
trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
sys_paths = expand_sys_str(ii)
for single_sys in sys_paths:
trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
for ii in fp_data :
if jdata.get('use_clusters', False):
for single_sys in os.listdir(os.path.join(ii)):
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
else:
trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
sys_paths = expand_sys_str(ii)
for single_sys in sys_paths:
trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
os.chdir(cwd)

try:
Expand Down
Loading