Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable setting test size individually for each system #267

Merged
merged 7 commits into from
Oct 19, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ dist
.eggs
_version.py
venv*
.vscode/**
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,10 @@ Since we do not have virial data, the virial prefactors `start_pref_v` and `limi
An example of `training` is
```json
"training" : {
"systems": ["../data/"],
"systems": ["../data1/", "../data2/"],
"set_prefix": "set",
"stop_batch": 1000000,
"_comment": " batch_size can be supplied with, e.g. 1, or auto (string) or [10, 20]"
amcadmus marked this conversation as resolved.
Show resolved Hide resolved
"batch_size": 1,

"seed": 1,
Expand All @@ -409,6 +410,7 @@ An example of `training` is
"_comment": " frequencies counted in batch",
"disp_file": "lcurve.out",
"disp_freq": 100,
"_comment": " numb_test can be supplied with, e.g. 1, or XX% (string) or [10, 20]"
amcadmus marked this conversation as resolved.
Show resolved Hide resolved
"numb_test": 10,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
Expand All @@ -422,9 +424,10 @@ An example of `training` is
```
The option **`systems`** provide location of the systems (path to `set.*` and `type.raw`). It is a vector, thus DeePMD-kit allows you to provide multiple systems. DeePMD-kit will train the model with the systems in the vector one by one in a cyclic manner. **It is warned that the example water data (in folder `examples/data/water`) is of very limited amount, is provided only for testing purpose, and should not be used to train a productive model.**

The option **`batch_size`** specifies the number of frames in each batch. It can be set to `"auto"` to enable a automatic batch size.
The option **`batch_size`** specifies the number of frames in each batch. It can be set to `"auto"` to enable a automatic batch size or it can be input as a list setting batch size individually for each system.
The option **`stop_batch`** specifies the total number of batches will be used in the training.

The option **`numb_test`** specifies the number of tests that will be used for each system. If it is an integer each system will be tested with the same number of tests. It can be set to percentage `"XX%"` to use XX% of frames of each system for its testing or it can be input as a list setting numer of tests individually for each system (the order should correspond to ordering of the systems key in json).

### Training

Expand Down
1 change: 1 addition & 0 deletions source/tests/test_deepmd_data_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def test_get_test(self):
-
data['null']
), 0.0)

sys_idx = 2
data = ds.get_test(sys_idx=sys_idx)
self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
Expand Down
66 changes: 55 additions & 11 deletions source/train/DataSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,29 @@ def __init__ (self,
type_map_list.append(self.data_systems[ii].get_type_map())
self.type_map = self._check_type_map_consistency(type_map_list)

# ! altered by Marián Rynik
# test size
# now test size can be set as a percentage of systems data or test size
# can be set for each system individualy in the same manner as batch
# size. This enables one to use systems with diverse number of
# structures and different number of atoms.
self.test_size = test_size
if isinstance(self.test_size, int):
self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
elif isinstance(self.test_size, str):
words = self.test_size.split('%')
try:
percent = int(words[0])
except ValueError:
raise RuntimeError('unknown test_size rule ' + words[0])
self.test_size = self._make_auto_ts(percent)
elif isinstance(self.test_size, list):
pass
else :
raise RuntimeError('invalid test_size')
assert(isinstance(self.test_size, (list,np.ndarray)))
assert(len(self.test_size) == self.nsystems)

# prob of batch, init pick idx
self.prob_nbatches = [ float(i) for i in self.nbatches] / np.sum(self.nbatches)
self.pick_idx = 0
Expand All @@ -75,10 +98,10 @@ def __init__ (self,
if chk_ret is not None :
warnings.warn("system %s required batch size is larger than the size of the dataset %s (%d > %d)" % \
(self.system_dirs[ii], chk_ret[0], self.batch_size[ii], chk_ret[1]))
chk_ret = self.data_systems[ii].check_test_size(test_size)
chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
if chk_ret is not None :
warnings.warn("system %s required test size is larger than the size of the dataset %s (%d > %d)" % \
(self.system_dirs[ii], chk_ret[0], test_size, chk_ret[1]))
(self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1]))


def _load_test(self, ntests = -1):
Expand Down Expand Up @@ -207,24 +230,34 @@ def get_batch (self,
b_data["default_mesh"] = self.default_mesh[self.pick_idx]
return b_data

# ! altered by Marián Rynik
def get_test (self,
sys_idx = None,
ntests = -1) :
sys_idx = None,
n_test = -1) :

if not hasattr(self, 'default_mesh') :
self._make_default_mesh()
if not hasattr(self, 'test_data') :
self._load_test(ntests = ntests)
self._load_test(ntests = n_test)
if sys_idx is not None :
idx = sys_idx
else :
idx = self.pick_idx

test_system_data = {}
for nn in self.test_data:
test_system_data[nn] = self.test_data[nn][idx]
test_system_data["natoms_vec"] = self.natoms_vec[idx]
test_system_data["default_mesh"] = self.default_mesh[idx]
return test_system_data

def get_sys_ntest(self, sys_idx=None):
"""Get number of tests for the currently selected system,
or one defined by sys_idx."""
if sys_idx is not None :
return self.test_size[sys_idx]
else :
return self.test_size[self.pick_idx]

def get_type_map(self):
return self.type_map
Expand Down Expand Up @@ -261,20 +294,21 @@ def print_summary(self,
# width 65
sys_width = 42
tmp_msg += "---Summary of DataSystem------------------------------------------------\n"
tmp_msg += "find %d system(s):\n" % self.nsystems
tmp_msg += "found %d system(s):\n" % self.nsystems
tmp_msg += "%s " % self._format_name_length('system', sys_width)
tmp_msg += "%s %s %s %5s\n" % ('natoms', 'bch_sz', 'n_bch', 'prob')
tmp_msg += "%s %s %s %s %5s\n" % ('natoms', 'bch_sz', 'n_bch', "n_test", 'prob')
marian-code marked this conversation as resolved.
Show resolved Hide resolved
for ii in range(self.nsystems) :
tmp_msg += ("%s %6d %6d %5d %5.3f\n" %
tmp_msg += ("%s %6d %6d %6d %6d %5.3f\n" %
(self._format_name_length(self.system_dirs[ii], sys_width),
self.natoms[ii],
self.batch_size[ii],
self.nbatches[ii],
# TODO batch size * nbatches = number of structures
self.batch_size[ii],
self.nbatches[ii],
self.test_size[ii],
prob[ii]) )
tmp_msg += "------------------------------------------------------------------------\n"
run_opt.message(tmp_msg)


def _make_auto_bs(self, rule) :
bs = []
for ii in self.data_systems:
Expand All @@ -285,6 +319,16 @@ def _make_auto_bs(self, rule) :
bs.append(bsi)
return bs

# ! added by Marián Rynik
def _make_auto_ts(self, percent):
ts = []
for ii in range(self.nsystems):
ni = self.batch_size[ii] * self.nbatches[ii]
tsi = int(ni * percent / 100)
ts.append(tsi)

return ts

def _check_type_map_consistency(self, type_map_list):
ret = []
for ii in type_map_list:
Expand Down
23 changes: 15 additions & 8 deletions source/train/Trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,9 @@ def _init_param(self, jdata):
# training
training_param = j_must_have(jdata, 'training')

# ! first .add() altered by Marián Rynik
tr_args = ClassArg()\
.add('numb_test', int, default = 1)\
.add('numb_test', [int, list, str], default = 1)\
.add('disp_file', str, default = 'lcurve.out')\
.add('disp_freq', int, default = 100)\
.add('save_freq', int, default = 1000)\
Expand All @@ -182,7 +183,8 @@ def _init_param(self, jdata):
.add('sys_probs', list )\
.add('auto_prob_style', str, default = "prob_sys_size")
tr_data = tr_args.parse(training_param)
self.numb_test = tr_data['numb_test']
# not needed
# self.numb_test = tr_data['numb_test']
self.disp_file = tr_data['disp_file']
self.disp_freq = tr_data['disp_freq']
self.save_freq = tr_data['save_freq']
Expand Down Expand Up @@ -458,17 +460,24 @@ def test_on_the_fly (self,
fp,
data,
feed_dict_batch) :
test_data = data.get_test(ntests = self.numb_test)
# ! altered by Marián Rynik
# Do not need to pass numb_test here as data object already knows it.
# Both DeepmdDataSystem and ClassArg parse the same json file
test_data = data.get_test(n_test=data.get_sys_ntest())
feed_dict_test = {}
for kk in test_data.keys():
if kk == 'find_type' or kk == 'type' :
continue
if 'find_' in kk:
feed_dict_test[self.place_holders[kk]] = test_data[kk]
else:
feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk][:self.numb_test], [-1])
# ! altered by Marián Rynik
# again the data object knows appropriate test data shape,
# there is no need to slice again!
# feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk][:self.numb_test[data.pick_idx]], [-1])
feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk], [-1])
for ii in ['type'] :
feed_dict_test[self.place_holders[ii]] = np.reshape(test_data[ii][:self.numb_test], [-1])
feed_dict_test[self.place_holders[ii]] = np.reshape(test_data[ii], [-1])
for ii in ['natoms_vec', 'default_mesh'] :
feed_dict_test[self.place_holders[ii]] = test_data[ii]
feed_dict_test[self.place_holders['is_training']] = False
Expand All @@ -483,6 +492,4 @@ def test_on_the_fly (self,
feed_dict_batch)
print_str += " %8.1e\n" % current_lr
fp.write(print_str)
fp.flush ()


fp.flush ()
18 changes: 12 additions & 6 deletions source/train/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# """
# cdf = 0.5 * (1.0 + tf.tanh((math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
# return x * cdf
def gelu(x) :
def gelu(x):
return op_module.gelu(x)

data_requirement = {}
Expand Down Expand Up @@ -110,11 +110,17 @@ def add (self,
def _add_single(self, key, data) :
vtype = type(data)
if not(vtype in self.arg_dict[key]['types']) :
# try the type convertion to the first listed type
try :
vv = (self.arg_dict[key]['types'][0])(data)
except TypeError:
raise TypeError ("cannot convert provided key \"%s\" to type %s " % (key, str(self.arg_dict[key]['types'][0])) )
# ! altered by Marián Rynik
# try the type convertion to one of the types
for tp in self.arg_dict[key]['types']:
try :
vv = tp(data)
except TypeError:
pass
else:
break
else:
raise TypeError ("cannot convert provided key \"%s\" to type(s) %s " % (key, str(self.arg_dict[key]['types'])) )
else :
vv = data
self.arg_dict[key]['value'] = vv
Expand Down