Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Large energy and force Errors when using Atom type embedding (test for deepmd-2.0beta) #678

Closed
Manyi-Yang opened this issue May 30, 2021 · 2 comments
Labels

Comments

@Manyi-Yang
Copy link

Manyi-Yang commented May 30, 2021

>Summary

The energy and force Errors for training using Atom type embedding are large.

>Deepmd-kit version input.json lcurve.out

2.0.0.b0, conda

>input.json

 {
    "model": {
        "type_map": [
            "S"
        ],

	"type_embedding":{
	    "neuron":		[2, 4, 8],
	    "resnet_dt":	false,
	    "seed":		1
	},	    
        "descriptor": {
            "type": "se_a_tpe",
            "sel": [
                128
            ],
            "rcut_smth": 0.5,
            "rcut": 6.5,
            "neuron": [
                25,
                50,
                100
            ],
            "resnet_dt": false,
            "axis_neuron": 16,
            "seed": 722586222
        },
        
        "fitting_net": {
            "neuron": [
                240,
                240,
                240,
                240
            ],
            "resnet_dt": true,
            "seed": 711230366
        }
    },
    "learning_rate": {
        "type": "exp",
        "start_lr": 0.001,
        "decay_steps": 5000,
        "stop_lr": 3.51e-8
    },
    "loss": {
	"type":"ener",
        "start_pref_e": 0.02,
        "limit_pref_e": 1,
        "start_pref_f": 1000,
        "limit_pref_f": 1,
        "start_pref_v": 0.0,
        "limit_pref_v": 0.0
    },
    "training": {

	"seed":		10,
	"disp_file":	"lcurve.out",
        "stop_batch": 1000000,
        "disp_freq": 1000,
        "numb_test": 5,
        "save_freq": 1000,
        "save_ckpt": "model.ckpt",
        "profiling": false,
        "profiling_file": "timeline.json",
        "training_data": {
             "systems": ["../data.iters/iter.000000/02.fp/data.002",
                 "../data.iters/iter.000000/02.fp/data.032",
                 "../data.iters/iter.000000/02.fp/data.000",
                 "../data.iters/iter.000000/02.fp/data.034",
                 "../data.iters/iter.000001/02.fp/data.004",
                 "../data.iters/iter.000001/02.fp/data.036",
                 "../data.iters/iter.000001/02.fp/data.006",
                 "../data.iters/iter.000001/02.fp/data.038",
                 "../data.iters/iter.000002/02.fp/data.016",
                 "../data.iters/iter.000002/02.fp/data.048",
                 "../data.iters/iter.000002/02.fp/data.018",
                 "../data.iters/iter.000002/02.fp/data.050",
                 "../data.iters/iter.000003/02.fp/data.022",
                 "../data.iters/iter.000003/02.fp/data.054",
                 "../data.iters/iter.000003/02.fp/data.052",
                 "../data.iters/iter.000003/02.fp/data.020",
                 "../data.iters/iter.000004/02.fp/data.058",
                 "../data.iters/iter.000004/02.fp/data.024",
                 "../data.iters/iter.000004/02.fp/data.026",
                 "../data.iters/iter.000004/02.fp/data.056",
                 "../data.iters/iter.000005/02.fp/data.060",
                 "../data.iters/iter.000005/02.fp/data.028",
                 "../data.iters/iter.000005/02.fp/data.030",
                 "../data.iters/iter.000005/02.fp/data.062",
                 "../data.iters/iter.000006/02.fp/data.010",
                 "../data.iters/iter.000006/02.fp/data.008",
                 "../data.iters/iter.000006/02.fp/data.042",
                 "../data.iters/iter.000006/02.fp/data.040",
                 "../data.iters/iter.000007/02.fp/data.044",
                 "../data.iters/iter.000007/02.fp/data.014",
                 "../data.iters/iter.000007/02.fp/data.046",
                 "../data.iters/iter.000007/02.fp/data.012"],
             "batch_size": 1
	},
	     "validation_data":{
             "systems": ["../data.iters/iter.000000/02.fp/data.002",
                 "../data.iters/iter.000000/02.fp/data.032",
                 "../data.iters/iter.000000/02.fp/data.000",
                 "../data.iters/iter.000000/02.fp/data.034",
                 "../data.iters/iter.000001/02.fp/data.004",
                 "../data.iters/iter.000001/02.fp/data.036",
                 "../data.iters/iter.000001/02.fp/data.006",
                 "../data.iters/iter.000001/02.fp/data.038",
                 "../data.iters/iter.000002/02.fp/data.016",
                 "../data.iters/iter.000002/02.fp/data.048",
                 "../data.iters/iter.000002/02.fp/data.018",
                 "../data.iters/iter.000002/02.fp/data.050",
                 "../data.iters/iter.000003/02.fp/data.022",
                 "../data.iters/iter.000003/02.fp/data.054",
                 "../data.iters/iter.000003/02.fp/data.052",
                 "../data.iters/iter.000003/02.fp/data.020",
                 "../data.iters/iter.000004/02.fp/data.058",
                 "../data.iters/iter.000004/02.fp/data.024",
                 "../data.iters/iter.000004/02.fp/data.026",
                 "../data.iters/iter.000004/02.fp/data.056",
                 "../data.iters/iter.000005/02.fp/data.060",
                 "../data.iters/iter.000005/02.fp/data.028",
                 "../data.iters/iter.000005/02.fp/data.030",
                 "../data.iters/iter.000005/02.fp/data.062",
                 "../data.iters/iter.000006/02.fp/data.010",
                 "../data.iters/iter.000006/02.fp/data.008",
                 "../data.iters/iter.000006/02.fp/data.042",
                 "../data.iters/iter.000006/02.fp/data.040",
                 "../data.iters/iter.000007/02.fp/data.044",
                 "../data.iters/iter.000007/02.fp/data.014",
                 "../data.iters/iter.000007/02.fp/data.046",
                 "../data.iters/iter.000007/02.fp/data.012"],
	    "batch_size":	1,
	    "numb_btch":	3,
	    "_comment":		"that's all"
	},
	"_comment":	"that's all"
    }
}

lcurve.out

step rmse_val rmse_trn rmse_e_val rmse_e_trn rmse_f_val rmse_f_trn lr
0 4.64e+02 4.64e+02 2.90e+02 2.90e+02 9.26e-01 8.04e-01 1.0e-03
1000 2.75e+01 3.01e+01 4.40e-02 4.74e-02 8.69e-01 9.51e-01 1.0e-03
2000 2.86e+01 3.14e+01 2.69e-02 4.51e-02 9.03e-01 9.93e-01 1.0e-03
..................
163000 1.29e+01 1.28e+01 1.86e-02 2.95e-02 9.28e-01 9.14e-01 1.9e-04
164000 1.27e+01 1.42e+01 4.62e-02 4.76e-02 9.07e-01 1.02e+00 1.9e-04
165000 1.33e+01 1.14e+01 2.72e-02 1.08e-02 9.79e-01 8.40e-01 1.8e-04
166000 1.16e+01 1.25e+01 9.23e-02 7.24e-02 8.52e-01 9.21e-01 1.8e-04
167000 1.23e+01 1.14e+01 2.35e-02 6.55e-02 9.04e-01 8.38e-01 1.8e-04
168000 1.20e+01 1.41e+01 4.60e-02 1.97e-02 8.82e-01 1.04e+00 1.8e-04
169000 1.17e+01 1.18e+01 2.20e-02 3.75e-02 8.63e-01 8.65e-01 1.8e-04
170000 1.13e+01 1.00e+01 3.51e-02 9.29e-02 8.55e-01 7.52e-01 1.7e-04
171000 1.22e+01 1.06e+01 3.65e-02 1.23e-02 9.17e-01 7.98e-01 1.7e-04
172000 1.18e+01 1.10e+01 5.21e-02 2.40e-02 8.92e-01 8.29e-01 1.7e-04
173000 1.17e+01 1.13e+01 5.59e-02 2.00e-02 8.83e-01 8.54e-01 1.7e-04
174000 1.24e+01 1.10e+01 7.28e-02 7.33e-02 9.30e-01 8.31e-01 1.7e-04
175000 1.14e+01 1.25e+01 7.05e-02 5.81e-02 8.83e-01 9.64e-01 1.7e-04
176000 1.07e+01 1.23e+01 5.51e-02 2.85e-02 8.24e-01 9.48e-01 1.7e-04
177000 1.13e+01 1.18e+01 7.20e-02 4.19e-02 8.71e-01 9.11e-01 1.7e-04
178000 1.12e+01 1.11e+01 5.49e-02 1.91e-02 8.63e-01 8.60e-01 1.7e-04
179000 1.20e+01 1.04e+01 4.85e-02 3.96e-02 9.28e-01 8.04e-01 1.7e-04
180000 1.19e+01 1.19e+01 6.74e-02 1.78e-02 9.37e-01 9.48e-01 1.6e-04
181000 1.15e+01 1.17e+01 8.61e-02 1.21e-03 9.07e-01 9.26e-01 1.6e-04
182000 1.13e+01 1.04e+01 3.48e-02 1.01e-02 8.94e-01 8.22e-01 1.6e-04
183000 1.05e+01 1.10e+01 3.94e-02 6.62e-02 8.35e-01 8.75e-01 1.6e-04
184000 1.16e+01 1.14e+01 2.73e-02 8.72e-02 9.24e-01 9.01e-01 1.6e-04

@amcadmus
Copy link
Member

amcadmus commented May 31, 2021

Could you please follow the example for the type embedding? Especially the type of the descriptor.
https://github.com/deepmodeling/deepmd-kit/blob/devel/examples/water/se_e2_a_tebd/input.json

For the case of water, one write

	"type_embedding":{
	    "neuron":		[2, 4, 8],
	    "resnet_dt":	false,
	    "seed":		1
	},	    
	"descriptor" :{
	    "type":		"se_e2_a",
	    "sel":		[46, 92],
	    "rcut_smth":	0.50,
	    "rcut":		6.00,
	    "neuron":		[25, 50, 100],
	    "resnet_dt":	false,
	    "axis_neuron":	16,
	    "type_one_side":	true,
	    "seed":		1,
	    "_comment":		" that's all"
	},

The type_embedding will automatically be used in the descriptor with type "se_e2_a", which is equivalent to the old one "se_a"

@njzjz
Copy link
Member

njzjz commented Mar 24, 2022

Hi Manyi, could you patch #1592 and see if it works for you?

@njzjz njzjz closed this as completed Aug 19, 2022
Repository owner moved this from Todo to Done in Bugfixes for DeePMD-kit Aug 19, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
Archived in project
Development

No branches or pull requests

3 participants