Skip to content

Commit

Permalink
update abacus example files to dpmd 2.x and dpdispatcher new version.
Browse files Browse the repository at this point in the history
  • Loading branch information
LiuRenxi authored and LiuRenxi committed Apr 24, 2022
1 parent 16b3b6a commit 4e993a5
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 193 deletions.
147 changes: 67 additions & 80 deletions examples/run/dp1.x-lammps-ABACUS-lcao-dpks/methane/machine.json
Original file line number Diff line number Diff line change
@@ -1,88 +1,75 @@
{
"train":[
{
"machine":{
"machine_type": "slurm",
"host_name": "localhost",
"port": 22,
"username": "mhchen_cls",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus_fake_dpks"
"api_version": "1.0",
"train": [
{
"command": "dp",
"machine": {
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},

"resources":{
"numb_node": 1,
"numb_gpu": 1,
"task_per_node": 1,
"partition": "gpu_4l",
"exclude_list": [],
"mem_limit": 0,
"source_list": ["/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/env.sh"],
"module_list": [],
"time_limit": "23:0:0",
"account":"mhchen_g1",
"qos": "mhcheng4c"
},
"python_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_dpgen_interface/4_deepmd-kit/dpmd_install/bin/python"
}
"resources": {
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "T4_4_15",
"group_size": 1,
"custom_flags":["#SBATCH --mem=32G"],
"strategy": {"if_cuda_multi_devices": true},
"para_deg": 3,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"model_devi": [
{
"machine": {
"machine_type": "slurm",
"hostname": "localhost",
"port": 22,
"username": "mhchen_cls",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus_fake_dpks"
},
"resources": {
"num_node": 1,
"num_gpu": 1,
"task_per_node": 1,
"partition": "gpu_4l",
"exclude_list": [],
"mem_limit": 0,
"source_list": ["/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/env.sh"],
"module_list": [],
"time_limit": "14:00:00",
"account": "mhchen_g1",
"qos": "mhcheng4c",
"allow_failure":true
},
"model_devi":[
{
"command": "lmp",
"group_size": 6
}
],
"fp": [
{
"machine":{
"machine_type": "slurm",
"hostname": "localhost",
"port": 22,
"username": "mhchen_cls",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus_fake_dpks"
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},
"resources": {
"cvasp": false,
"numb_node": 1,
"partition": "cn-large",
"task_per_node": 4,

"numb_gpu": 0,
"exclude_list": [],
"with_mpi": false,
"mem_limit": 0,
"source_list":[
"/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/ABACUS/source_intel.sh"
],
"module_list":[],
"time_limit": "48:0:0",
"account": "mhchen_cg2",
"qos": "mhchenq",
"_comment": "that's all"
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "T4_4_15",
"group_size": 5,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"fp":[
{
"command": "ABACUS.mpi",
"machine":{
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},
"command": "mpirun -np 4 ABACUS.mpi",
"group_size": 150
}

]
}
"resources": {
"number_node": 1,
"cpu_per_node": 32,
"gpu_per_node": 0,
"queue_name": "G_32_128",
"group_size": 1,
"source_list": ["~/abacus.env"]
}
}
]
}
29 changes: 13 additions & 16 deletions examples/run/dp1.x-lammps-ABACUS-lcao-dpks/methane/param.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,19 @@
"decay_rate": 0.95
},
"training": {
"systems": [],
"set_prefix": "set",
"stop_batch": 3000,
"batch_size": 1,
"seed": 1,
"_comment": "frequencies counted in batch",
"disp_file": "lcurve.out",
"disp_freq": 500,
"numb_test": 4,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
"load_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json"
"set_prefix": "set",
"numb_steps": 2000,
"batch_size": 1,
"disp_file": "lcurve.out",
"disp_freq": 1000,
"numb_test": 4,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json",
"_comment": "that's all"
}
},

Expand Down
149 changes: 68 additions & 81 deletions examples/run/dp1.x-lammps-ABACUS-pw/methane/machine.json
Original file line number Diff line number Diff line change
@@ -1,88 +1,75 @@
{
"train":[
{
"machine":{
"machine_type": "slurm",
"host_name": "localhost",
"port": 22,
"username": "",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus"
"api_version": "1.0",
"train": [
{
"command": "dp",
"machine": {
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},

"resources":{
"numb_node": 1,
"numb_gpu": 1,
"task_per_node": 1,
"partition": "",
"exclude_list": [],
"mem_limit": 0,
"source_list": ["/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/env.sh"],
"module_list": [],
"time_limit": "23:0:0",
"account":"",
"qos": ""
},
"python_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_dpgen_interface/4_deepmd-kit/dpmd_install/bin/python"
}
"resources": {
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "T4_4_15",
"group_size": 1,
"custom_flags":["#SBATCH --mem=32G"],
"strategy": {"if_cuda_multi_devices": true},
"para_deg": 3,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"model_devi": [
{
"machine": {
"machine_type": "slurm",
"hostname": "localhost",
"port": 22,
"username": "",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus"
},
"resources": {
"num_node": 1,
"num_gpu": 0,
"task_per_node": 2,
"partition": "cn-large",
"exclude_list": [],
"mem_limit": 0,
"source_list": ["/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/env.sh"],
"module_list": [],
"time_limit": "14:00:00",
"account": "",
"qos": "",
"allow_failure":true
},
"command": "mpirun -n 2 lmp",
"group_size": 6
}
],
"fp": [
{
"model_devi":[
{
"command": "lmp",
"machine":{
"machine_type": "slurm",
"hostname": "localhost",
"port": 22,
"username": "",
"work_path": "/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/5_dpgen_examples/dpgen-example/run_abacus"
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},
"resources": {
"cvasp": false,
"numb_node": 1,
"partition": "cn-large",
"task_per_node": 4,

"numb_gpu": 0,
"exclude_list": [],
"with_mpi": false,
"mem_limit": 0,
"source_list":[
"/home/mhchen_pkuhpc/mhchen_cls/lustre2/5_liurenxi/5_ABACUS_dpgen_interface/env.sh"
],
"module_list":[],
"time_limit": "48:0:0",
"account": "",
"qos": "",
"_comment": "that's all"
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "T4_4_15",
"group_size": 5,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"fp":[
{
"command": "ABACUS.mpi",
"machine":{
"batch_type": "PBS",
"context_type": "SSHContext",
"local_root": "./",
"remote_root": "/home/user1234/work_path_dpdispatcher_test",
"remote_profile": {
"hostname": "39.xxx.xx.xx",
"username": "user1234"
}
},
"command": "mpirun -np 4 ABACUS.mpi.2.1.0",
"group_size": 150
}

]
}
"resources": {
"number_node": 1,
"cpu_per_node": 32,
"gpu_per_node": 0,
"queue_name": "G_32_128",
"group_size": 1,
"source_list": ["~/abacus.env"]
}
}
]
}
29 changes: 13 additions & 16 deletions examples/run/dp1.x-lammps-ABACUS-pw/methane/param.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,19 @@
"decay_rate": 0.95
},
"training": {
"systems": [],
"set_prefix": "set",
"stop_batch": 36000,
"batch_size": 1,
"seed": 1,
"_comment": "frequencies counted in batch",
"disp_file": "lcurve.out",
"disp_freq": 1000,
"numb_test": 4,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
"load_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json"
"set_prefix": "set",
"numb_steps": 2000,
"batch_size": 1,
"disp_file": "lcurve.out",
"disp_freq": 1000,
"numb_test": 4,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json",
"_comment": "that's all"
}
},

Expand Down

0 comments on commit 4e993a5

Please sign in to comment.