From 2039b4342148862f41d116c8a335d7465ca4cae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mari=C3=A1n=20Rynik?= Date: Wed, 7 Oct 2020 15:27:55 +0200 Subject: [PATCH 1/7] enable setting test size individually for each system --- .gitignore | 1 + README.md | 9 ++--- source/train/DataSystem.py | 68 ++++++++++++++++++++++++++++++-------- source/train/Trainer.py | 23 ++++++++----- source/train/common.py | 16 ++++++--- 5 files changed, 86 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index c41f53e077..435a560708 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ dist .eggs _version.py venv* +.vscode/** diff --git a/README.md b/README.md index 5520daa57f..abf3fcc35a 100644 --- a/README.md +++ b/README.md @@ -398,10 +398,10 @@ Since we do not have virial data, the virial prefactors `start_pref_v` and `limi An example of `training` is ```json "training" : { - "systems": ["../data/"], + "systems": ["../data1/", "../data2/"], "set_prefix": "set", "stop_batch": 1000000, - "batch_size": 1, + "batch_size": 1, # or "auto" or [10, 20] "seed": 1, @@ -409,7 +409,7 @@ An example of `training` is "_comment": " frequencies counted in batch", "disp_file": "lcurve.out", "disp_freq": 100, - "numb_test": 10, + "numb_test": 10, # or "XX%" or [10, 20] "save_freq": 1000, "save_ckpt": "model.ckpt", "load_ckpt": "model.ckpt", @@ -422,9 +422,10 @@ An example of `training` is ``` The option **`systems`** provide location of the systems (path to `set.*` and `type.raw`). It is a vector, thus DeePMD-kit allows you to provide multiple systems. DeePMD-kit will train the model with the systems in the vector one by one in a cyclic manner. **It is warned that the example water data (in folder `examples/data/water`) is of very limited amount, is provided only for testing purpose, and should not be used to train a productive model.** -The option **`batch_size`** specifies the number of frames in each batch. It can be set to `"auto"` to enable a automatic batch size. +The option **`batch_size`** specifies the number of frames in each batch. It can be set to `"auto"` to enable a automatic batch size or it can be input as a list setting batch size individually for each system. The option **`stop_batch`** specifies the total number of batches will be used in the training. +The option **`numb_test`** specifies the number of tests that will be used for each system. If it is an integer each system will be tested with the same number of tests. It can be set to percentage `"XX%"` to use XX% of frames of each system for its testing or it can be input as a list setting numer of tests individually for each system (the order should correspond to ordering of the systems key in json). ### Training diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py index 61d59f5cea..03971d196d 100644 --- a/source/train/DataSystem.py +++ b/source/train/DataSystem.py @@ -65,6 +65,29 @@ def __init__ (self, type_map_list.append(self.data_systems[ii].get_type_map()) self.type_map = self._check_type_map_consistency(type_map_list) + # ! altered by Marián Rynik + # test size + # now test size can be set as a percentage of systems data or test size + # can be set for each system individualy in the same manner as batch + # size. This enables one to use systems with diverse number of + # structures and different number of atoms. + self.test_size = test_size + if isinstance(self.test_size, int): + self.test_size = self.test_size * np.ones(self.nsystems, dtype=int) + elif isinstance(self.test_size, str): + words = self.test_size.split('%') + try: + percent = int(words[0]) + except ValueError: + raise RuntimeError('unknown test_size rule ' + words[0]) + self.test_size = self._make_auto_ts(percent) + elif isinstance(self.test_size, list): + pass + else : + raise RuntimeError('invalid test_size') + assert(isinstance(self.test_size, (list,np.ndarray))) + assert(len(self.test_size) == self.nsystems) + # prob of batch, init pick idx self.prob_nbatches = [ float(i) for i in self.nbatches] / np.sum(self.nbatches) self.pick_idx = 0 @@ -75,10 +98,10 @@ def __init__ (self, if chk_ret is not None : warnings.warn("system %s required batch size is larger than the size of the dataset %s (%d > %d)" % \ (self.system_dirs[ii], chk_ret[0], self.batch_size[ii], chk_ret[1])) - chk_ret = self.data_systems[ii].check_test_size(test_size) + chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii]) if chk_ret is not None : warnings.warn("system %s required test size is larger than the size of the dataset %s (%d > %d)" % \ - (self.system_dirs[ii], chk_ret[0], test_size, chk_ret[1])) + (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])) def _load_test(self, ntests = -1): @@ -207,17 +230,23 @@ def get_batch (self, b_data["default_mesh"] = self.default_mesh[self.pick_idx] return b_data + # ! altered by Marián Rynik def get_test (self, - sys_idx = None, - ntests = -1) : - if not hasattr(self, 'default_mesh') : - self._make_default_mesh() - if not hasattr(self, 'test_data') : - self._load_test(ntests = ntests) + sys_idx = None) : + + # need to get idx first to get the appropriate test size for the + # current system if sys_idx is not None : idx = sys_idx else : + # idx get selected in get batch, it is the index of a system idx = self.pick_idx + + if not hasattr(self, 'default_mesh') : + self._make_default_mesh() + if not hasattr(self, 'test_data') : + self._load_test(ntests = self.test_size[idx]) + test_system_data = {} for nn in self.test_data: test_system_data[nn] = self.test_data[nn][idx] @@ -261,20 +290,21 @@ def print_summary(self, # width 65 sys_width = 42 tmp_msg += "---Summary of DataSystem------------------------------------------------\n" - tmp_msg += "find %d system(s):\n" % self.nsystems + tmp_msg += "found %d system(s):\n" % self.nsystems tmp_msg += "%s " % self._format_name_length('system', sys_width) - tmp_msg += "%s %s %s %5s\n" % ('natoms', 'bch_sz', 'n_bch', 'prob') + tmp_msg += "%s %s %s %s %5s\n" % ('natoms', 'bch_sz', 'n_bch', "n_test", 'prob') for ii in range(self.nsystems) : - tmp_msg += ("%s %6d %6d %5d %5.3f\n" % + tmp_msg += ("%s %6d %6d %6d %6d %5.3f\n" % (self._format_name_length(self.system_dirs[ii], sys_width), self.natoms[ii], - self.batch_size[ii], - self.nbatches[ii], + # TODO batch size * nbatches = number of structures + self.batch_size[ii], + self.nbatches[ii], + self.test_size[ii], prob[ii]) ) tmp_msg += "------------------------------------------------------------------------\n" run_opt.message(tmp_msg) - def _make_auto_bs(self, rule) : bs = [] for ii in self.data_systems: @@ -285,6 +315,16 @@ def _make_auto_bs(self, rule) : bs.append(bsi) return bs + # ! added by Marián Rynik + def _make_auto_ts(self, percent): + ts = [] + for ii in range(self.nsystems): + ni = self.batch_size[ii] * self.nbatches[ii] + tsi = int(ni * percent / 100) + ts.append(tsi) + + return ts + def _check_type_map_consistency(self, type_map_list): ret = [] for ii in type_map_list: diff --git a/source/train/Trainer.py b/source/train/Trainer.py index b6428c987e..b5beb18ef6 100644 --- a/source/train/Trainer.py +++ b/source/train/Trainer.py @@ -169,8 +169,9 @@ def _init_param(self, jdata): # training training_param = j_must_have(jdata, 'training') + # ! first .add() altered by Marián Rynik tr_args = ClassArg()\ - .add('numb_test', int, default = 1)\ + .add('numb_test', [int, list, str], default = 1)\ .add('disp_file', str, default = 'lcurve.out')\ .add('disp_freq', int, default = 100)\ .add('save_freq', int, default = 1000)\ @@ -182,7 +183,8 @@ def _init_param(self, jdata): .add('sys_probs', list )\ .add('auto_prob_style', str, default = "prob_sys_size") tr_data = tr_args.parse(training_param) - self.numb_test = tr_data['numb_test'] + # not needed + # self.numb_test = tr_data['numb_test'] self.disp_file = tr_data['disp_file'] self.disp_freq = tr_data['disp_freq'] self.save_freq = tr_data['save_freq'] @@ -458,7 +460,10 @@ def test_on_the_fly (self, fp, data, feed_dict_batch) : - test_data = data.get_test(ntests = self.numb_test) + # ! altered by Marián Rynik + # Do not need to pass numb_test here as data object already knows it. + # Both DeepmdDataSystem and ClassArg parse the same json file + test_data = data.get_test() feed_dict_test = {} for kk in test_data.keys(): if kk == 'find_type' or kk == 'type' : @@ -466,9 +471,13 @@ def test_on_the_fly (self, if 'find_' in kk: feed_dict_test[self.place_holders[kk]] = test_data[kk] else: - feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk][:self.numb_test], [-1]) + # ! altered by Marián Rynik + # again the data object knows appropriate test data shape, + # there is no need to slice again! + # feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk][:self.numb_test[data.pick_idx]], [-1]) + feed_dict_test[self.place_holders[kk]] = np.reshape(test_data[kk], [-1]) for ii in ['type'] : - feed_dict_test[self.place_holders[ii]] = np.reshape(test_data[ii][:self.numb_test], [-1]) + feed_dict_test[self.place_holders[ii]] = np.reshape(test_data[ii], [-1]) for ii in ['natoms_vec', 'default_mesh'] : feed_dict_test[self.place_holders[ii]] = test_data[ii] feed_dict_test[self.place_holders['is_training']] = False @@ -483,6 +492,4 @@ def test_on_the_fly (self, feed_dict_batch) print_str += " %8.1e\n" % current_lr fp.write(print_str) - fp.flush () - - + fp.flush () \ No newline at end of file diff --git a/source/train/common.py b/source/train/common.py index 887669a278..f5092bbbe9 100644 --- a/source/train/common.py +++ b/source/train/common.py @@ -110,11 +110,17 @@ def add (self, def _add_single(self, key, data) : vtype = type(data) if not(vtype in self.arg_dict[key]['types']) : - # try the type convertion to the first listed type - try : - vv = (self.arg_dict[key]['types'][0])(data) - except TypeError: - raise TypeError ("cannot convert provided key \"%s\" to type %s " % (key, str(self.arg_dict[key]['types'][0])) ) + # ! altered by Marián Rynik + # try the type convertion to one of the types + for tp in self.arg_dict[key]['types']: + try : + vv = tp(data) + except TypeError: + pass + else: + break + else: + raise TypeError ("cannot convert provided key \"%s\" to type(s) %s " % (key, str(self.arg_dict[key]['types'])) ) else : vv = data self.arg_dict[key]['value'] = vv From af3867a4cceef3aa0b513a043cc4825d63eacbf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mari=C3=A1n=20Rynik?= Date: Wed, 7 Oct 2020 22:55:14 +0200 Subject: [PATCH 2/7] fix failing test_get_test --- source/tests/test_deepmd_data_sys.py | 5 +++-- source/train/DataSystem.py | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py index d08b148f3a..c684de7c0c 100644 --- a/source/tests/test_deepmd_data_sys.py +++ b/source/tests/test_deepmd_data_sys.py @@ -83,7 +83,7 @@ def test_get_test(self): ds.add('test', self.test_ndof, atomic = True, must = True) ds.add('null', self.test_ndof, atomic = True, must = False) sys_idx = 0 - data = ds.get_test(sys_idx=sys_idx) + data = ds.get_test(sys_idx=sys_idx, n_test=-1) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_0/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, @@ -98,8 +98,9 @@ def test_get_test(self): - data['null'] ), 0.0) + sys_idx = 2 - data = ds.get_test(sys_idx=sys_idx) + data = ds.get_test(sys_idx=sys_idx, n_test=-1) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_2/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py index 03971d196d..271d898774 100644 --- a/source/train/DataSystem.py +++ b/source/train/DataSystem.py @@ -232,20 +232,23 @@ def get_batch (self, # ! altered by Marián Rynik def get_test (self, - sys_idx = None) : + sys_idx = None, + n_test = None) : # need to get idx first to get the appropriate test size for the # current system if sys_idx is not None : idx = sys_idx else : - # idx get selected in get batch, it is the index of a system + # idx get selected in get_batch method, it must be run first + # otherwise this will get messed-up idx = self.pick_idx if not hasattr(self, 'default_mesh') : self._make_default_mesh() if not hasattr(self, 'test_data') : - self._load_test(ntests = self.test_size[idx]) + n_test = n_test if n_test is not None else self.test_size[idx] + self._load_test(ntests = n_test) test_system_data = {} for nn in self.test_data: From 265c559beee40230d9981541cf991ad71cf0ab6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mari=C3=A1n=20Rynik?= Date: Thu, 8 Oct 2020 10:35:48 +0200 Subject: [PATCH 3/7] some small alterations to better preserve the original logic of the code --- source/tests/test_deepmd_data_sys.py | 4 ++-- source/train/DataSystem.py | 23 ++++++++++++----------- source/train/Trainer.py | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py index c684de7c0c..4cbf2af1b7 100644 --- a/source/tests/test_deepmd_data_sys.py +++ b/source/tests/test_deepmd_data_sys.py @@ -83,7 +83,7 @@ def test_get_test(self): ds.add('test', self.test_ndof, atomic = True, must = True) ds.add('null', self.test_ndof, atomic = True, must = False) sys_idx = 0 - data = ds.get_test(sys_idx=sys_idx, n_test=-1) + data = ds.get_test(sys_idx=sys_idx) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_0/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, @@ -100,7 +100,7 @@ def test_get_test(self): ), 0.0) sys_idx = 2 - data = ds.get_test(sys_idx=sys_idx, n_test=-1) + data = ds.get_test(sys_idx=sys_idx) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_2/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py index 271d898774..5aa866d550 100644 --- a/source/train/DataSystem.py +++ b/source/train/DataSystem.py @@ -233,23 +233,17 @@ def get_batch (self, # ! altered by Marián Rynik def get_test (self, sys_idx = None, - n_test = None) : + n_test = -1) : - # need to get idx first to get the appropriate test size for the - # current system + if not hasattr(self, 'default_mesh') : + self._make_default_mesh() + if not hasattr(self, 'test_data') : + self._load_test(ntests = n_test) if sys_idx is not None : idx = sys_idx else : - # idx get selected in get_batch method, it must be run first - # otherwise this will get messed-up idx = self.pick_idx - if not hasattr(self, 'default_mesh') : - self._make_default_mesh() - if not hasattr(self, 'test_data') : - n_test = n_test if n_test is not None else self.test_size[idx] - self._load_test(ntests = n_test) - test_system_data = {} for nn in self.test_data: test_system_data[nn] = self.test_data[nn][idx] @@ -257,6 +251,13 @@ def get_test (self, test_system_data["default_mesh"] = self.default_mesh[idx] return test_system_data + def get_sys_ntest(self, sys_idx=None): + """Get number of tests for the currently selected system, + or one defined by sys_idx.""" + if sys_idx is not None : + return self.test_size[sys_idx] + else : + return self.test_size[self.pick_idx] def get_type_map(self): return self.type_map diff --git a/source/train/Trainer.py b/source/train/Trainer.py index b5beb18ef6..31a95346a9 100644 --- a/source/train/Trainer.py +++ b/source/train/Trainer.py @@ -463,7 +463,7 @@ def test_on_the_fly (self, # ! altered by Marián Rynik # Do not need to pass numb_test here as data object already knows it. # Both DeepmdDataSystem and ClassArg parse the same json file - test_data = data.get_test() + test_data = data.get_test(n_test=data.get_sys_ntest()) feed_dict_test = {} for kk in test_data.keys(): if kk == 'find_type' or kk == 'type' : From 373b0aebfb22cd3ccf6da98f40cc9d3acaddaf7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mari=C3=A1n=20Rynik?= Date: Thu, 8 Oct 2020 14:09:28 +0200 Subject: [PATCH 4/7] trigger new travis build --- source/train/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/train/common.py b/source/train/common.py index f5092bbbe9..b4a8b42ce8 100644 --- a/source/train/common.py +++ b/source/train/common.py @@ -16,7 +16,7 @@ # """ # cdf = 0.5 * (1.0 + tf.tanh((math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3))))) # return x * cdf -def gelu(x) : +def gelu(x): return op_module.gelu(x) data_requirement = {} From 44b889c1b7fe54dafd6d54bd459ef41459185ad9 Mon Sep 17 00:00:00 2001 From: marian-code Date: Fri, 16 Oct 2020 16:30:56 +0200 Subject: [PATCH 5/7] resolve requested changes clear the confusion caused by adding python style comments to json file --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index abf3fcc35a..c16fe7dd8c 100644 --- a/README.md +++ b/README.md @@ -401,7 +401,8 @@ An example of `training` is "systems": ["../data1/", "../data2/"], "set_prefix": "set", "stop_batch": 1000000, - "batch_size": 1, # or "auto" or [10, 20] + "_comment": " batch_size can be supplied with, e.g. 1, or auto (string) or [10, 20]" + "batch_size": 1, "seed": 1, @@ -409,7 +410,8 @@ An example of `training` is "_comment": " frequencies counted in batch", "disp_file": "lcurve.out", "disp_freq": 100, - "numb_test": 10, # or "XX%" or [10, 20] + "_comment": " numb_test can be supplied with, e.g. 1, or XX% (string) or [10, 20]" + "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", "load_ckpt": "model.ckpt", From 289e532865e8f03e6f5a2bdab4478d97cfd74e9d Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 19 Oct 2020 10:11:39 +0800 Subject: [PATCH 6/7] Update README.md Co-authored-by: Jinzhe Zeng --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c16fe7dd8c..b23f76f75d 100644 --- a/README.md +++ b/README.md @@ -401,7 +401,7 @@ An example of `training` is "systems": ["../data1/", "../data2/"], "set_prefix": "set", "stop_batch": 1000000, - "_comment": " batch_size can be supplied with, e.g. 1, or auto (string) or [10, 20]" + "_comment": " batch_size can be supplied with, e.g. 1, or auto (string) or [10, 20]", "batch_size": 1, "seed": 1, From b7c523cacb50a5265bed1154c48e49b0349ead2c Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 19 Oct 2020 10:11:52 +0800 Subject: [PATCH 7/7] Update README.md Co-authored-by: Jinzhe Zeng --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b23f76f75d..23cf621471 100644 --- a/README.md +++ b/README.md @@ -410,7 +410,7 @@ An example of `training` is "_comment": " frequencies counted in batch", "disp_file": "lcurve.out", "disp_freq": 100, - "_comment": " numb_test can be supplied with, e.g. 1, or XX% (string) or [10, 20]" + "_comment": " numb_test can be supplied with, e.g. 1, or XX% (string) or [10, 20]", "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt",