Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tests to check machine files #874

Merged
merged 1 commit into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dpgen/arginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ def general_mdata_arginfo(name: str, tasks: Tuple[str]) -> Argument:
"""

doc_api_version = "Please set to 1.0"
doc_deepmd_version = "DeePMD-kit version, e.g. 2.1.3"
doc_run_mdata = "machine.json file"
arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version)
arg_deepmd_version = Argument(
"deepmd_version", str, optional=True, default="2", doc=doc_deepmd_version)

sub_fields = [arg_api_version]
sub_fields = [arg_api_version, arg_deepmd_version]
doc_mdata = "Parameters of command, machine, and resources for %s"
for task in tasks:
sub_fields.append(Argument(
Expand Down
7 changes: 6 additions & 1 deletion dpgen/dispatcher/Dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,15 +397,20 @@ def mdata_arginfo() -> List[Argument]:
arginfo
"""
doc_command = "Command of a program."
doc_mdata = "Machine and resources parameters"
doc_user_forward_files = "Files to be forwarded to the remote machine."
doc_user_backward_files = "Files to be backwarded from the remote machine."
command_arginfo = Argument("command", str, optional=False, doc=doc_command)
machine_arginfo = Machine.arginfo()
machine_arginfo.name = "machine"
resources_arginfo = Resources.arginfo()
resources_arginfo.name = "resources"
user_forward_files_arginfo = Argument("user_forward_files", list, optional=True, doc=doc_user_forward_files)
user_backward_files_arginfo = Argument("user_backward_files", list, optional=True, doc=doc_user_backward_files)

return [
command_arginfo, machine_arginfo, resources_arginfo,
user_forward_files_arginfo,
user_backward_files_arginfo,
]


Expand Down
16 changes: 8 additions & 8 deletions examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"api_version": "1.0",
"train": [
"train":
{
"command": "dp",
"machine": {
Expand All @@ -14,6 +14,7 @@
}
},
"resources": {
"batch_type": "PBS",
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
Expand All @@ -24,9 +25,8 @@
"para_deg": 3,
"source_list": ["/home/fengbo/deepmd.1.2.4.env"]
}
}
],
"model_devi":[
},
"model_devi":
{
"command": "lmp",
"machine":{
Expand All @@ -40,6 +40,7 @@
}
},
"resources": {
"batch_type": "PBS",
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
Expand All @@ -51,9 +52,8 @@
"user_forward_files" : [],
"_comments" : "In user_backward_files, define output files to be collected.",
"user_backward_files" : ["HILLS"]
}
],
"fp":[
},
"fp":
{
"command": "vasp_std",
"machine":{
Expand All @@ -67,6 +67,7 @@
}
},
"resources": {
"batch_type": "PBS",
"number_node": 1,
"cpu_per_node": 32,
"gpu_per_node": 0,
Expand All @@ -79,5 +80,4 @@
"_comments" : "In user_backward_files, define output files to be collected.",
"user_backward_files" : []
}
]
}
23 changes: 14 additions & 9 deletions examples/CH4-refact-dpdispatcher/machine-dpcloudserver.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
{
"api_version": "1.0",
"train" :[
"train" :
{
"command": "dp",
"machine": {
"batch_type": "DpCloudServer",
"context_type": "DpCloudServerContext",
"local_root" : "./",
"remote_profile":{
"username": "yfb222333",
"email": "yfb222333",
"password": "",
"program_id": 0,
"input_data":{
"job_type": "indicate",
"log_file": "dp_cloud_server.log",
Expand All @@ -33,23 +34,25 @@
}
},
"resources": {
"batch_type": "DpCloudServer",
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "GPU",
"group_size": 1
}
}],
},
"model_devi":
[{
{
"command": "lmp -i input.lammps -v restart 0",
"machine": {
"batch_type": "DpCloudServer",
"context_type": "DpCloudServerContext",
"local_root" : "./",
"remote_profile":{
"username": "yfb222333",
"email": "yfb222333",
"password": "",
"program_id": 0,
"input_data":{
"job_type": "indicate",
"log_file": "dp_cloud_server.log",
Expand All @@ -73,23 +76,25 @@
}
},
"resources": {
"batch_type": "DpCloudServer",
"number_node": 1,
"cpu_per_node": 4,
"gpu_per_node": 1,
"queue_name": "GPU",
"group_size": 5
}
}],
},
"fp":
[{
{
"command": "mpirun -n 16 vasp_std",
"machine": {
"batch_type": "DpCloudServer",
"context_type": "DpCloudServerContext",
"local_root" : "./",
"remote_profile":{
"username": "yfb222333",
"email": "yfb222333",
"password": "",
"program_id": 0,
"input_data":{
"job_type": "indicate",
"log_file": "dp_cloud_server.log",
Expand All @@ -112,6 +117,7 @@
}
},
"resources": {
"batch_type": "DpCloudServer",
"number_node": 1,
"cpu_per_node": 32,
"gpu_per_node": 0,
Expand All @@ -122,5 +128,4 @@
]
}
}
]
}
29 changes: 13 additions & 16 deletions examples/run/dp2.x-lammps-ABACUS-lcao/fcc-al/machine.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"api_version": "1.0",
"deepmd_version": "2.0.1",
"train" :[
"train" :
{
"command": "dp",
"machine": {
Expand All @@ -11,7 +11,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -25,18 +25,17 @@
"platform": "",
"region":"",
"image_name":"",
"on_demand":0,
"job_type":""
"on_demand":0
}
}
},
"resources": {
"local_root":"./",
"batch_type": "Lebesgue",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why delete "local_root", and add "batch_type"? Shouldn't "batch_type" be added in "machine" block?

Copy link
Member Author

@njzjz njzjz Aug 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why you added local_root. It's never a parameter of resources.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batch_type is used for checking as different types have different parameters.

Copy link
Contributor

@hongriTianqi hongriTianqi Aug 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why you added local_root. It's never a parameter of resources.

This machine.json file was obtained from dpgen users in AISI, local_root was set from the very beginning. I will check with them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batch_type is used for checking as different types have different parameters.

batch_type has been set in the machine block, is it necessary to repeat this information in resources block?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@y1xiaoc do you think it is possible to use an argument in another block as the variant key?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@y1xiaoc do you think it is possible to use an argument in another block as the variant key?

In principle yes but I may need to redesign current dargs code structure. Are these two keys always have to be the same? Currently one way to enforce that is to set the extra_check parameter in their comment parent argument.

"group_size": 1
}
}],
},
"model_devi":
[{
{
"command": "lmp -i input.lammps -v restart 0",
"machine": {
"batch_type": "Lebesgue",
Expand All @@ -45,7 +44,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -59,18 +58,17 @@
"image_name":"",
"checkpoint_files": "sync_files",
"checkpoint_time":30,
"on_demand":0,
"job_type":""
"on_demand":0
}
}
},
"resources": {
"local_root":"./",
"batch_type": "Lebesgue",
"group_size": 50
}
}],
},
"fp":
[{
{
"command": "OMP_NUM_THREADS=1 mpirun -n 8 abacus",
"machine": {
"batch_type": "Lebesgue",
Expand All @@ -79,7 +77,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -97,10 +95,9 @@
}
},
"resources": {
"batch_type": "Lebesgue",
"group_size": 50,
"local_root":"./",
"source_list": [""]
}
}
]
}
29 changes: 13 additions & 16 deletions examples/run/dp2.x-lammps-ABACUS-pw/fcc-al/machine.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"api_version": "1.0",
"deepmd_version": "2.0.1",
"train" :[
"train" :
{
"command": "dp",
"machine": {
Expand All @@ -11,7 +11,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -25,18 +25,17 @@
"platform": "",
"region":"",
"image_name":"",
"on_demand":0,
"job_type":""
"on_demand":0
}
}
},
"resources": {
"local_root":"./",
"batch_type": "Lebesgue",
"group_size": 1
}
}],
},
"model_devi":
[{
{
"command": "lmp -i input.lammps -v restart 0",
"machine": {
"batch_type": "Lebesgue",
Expand All @@ -45,7 +44,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -59,18 +58,17 @@
"image_name":"",
"checkpoint_files": "sync_files",
"checkpoint_time":30,
"on_demand":0,
"job_type":""
"on_demand":0
}
}
},
"resources": {
"local_root":"./",
"batch_type": "Lebesgue",
"group_size": 50
}
}],
},
"fp":
[{
{
"command": "OMP_NUM_THREADS=1 mpirun -n 8 abacus",
"machine": {
"batch_type": "Lebesgue",
Expand All @@ -79,7 +77,7 @@
"remote_profile":{
"email": "",
"password": "",
"program_id": ,
"program_id": 0,
"keep_backup":true,
"input_data":{
"job_type": "indicate",
Expand All @@ -97,10 +95,9 @@
}
},
"resources": {
"batch_type": "Lebesgue",
"group_size": 50,
"local_root":"./",
"source_list": [""]
}
}
]
}
Loading