Skip to content

Commit

Permalink
Merge branch 'master' into refactor-finetune
Browse files Browse the repository at this point in the history
  • Loading branch information
zjgemi committed Jul 31, 2024
2 parents 3a45d4c + f5c5d95 commit fdefc70
Show file tree
Hide file tree
Showing 22 changed files with 2,162 additions and 13 deletions.
1 change: 0 additions & 1 deletion .git_archival.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
node: $Format:%H$
node-date: $Format:%cI$
describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
ref-names: $Format:%D$
6 changes: 3 additions & 3 deletions .github/workflows/pub-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ jobs:
uses: actions/checkout@v4

- name: Log in to Docker Hub
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Log in to the Container registry
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
registry: ghcr.io
username: ${{ github.actor }}
Expand All @@ -46,7 +46,7 @@ jobs:
ghcr.io/deepmodeling/dpgen2
- name: Build and push Docker images
uses: docker/build-push-action@c382f710d39a5bb4e430307530a720f50c2d3318
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445
with:
context: .
push: true
Expand Down
16 changes: 16 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def dp_dist_train_args():
doc_template_script = "File names of the template training script. It can be a `List[str]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `str`, the models share the same template training script. "
dock_student_model_path = "The path of student model"
doc_student_model_uri = "The URI of student model"
doc_optional_files = "Optional files for training"

return [
Argument(
Expand All @@ -67,6 +68,13 @@ def dp_dist_train_args():
default=None,
doc=doc_student_model_uri,
),
Argument(
"optional_files",
list,
optional=True,
default=None,
doc=doc_optional_files,
),
]


Expand All @@ -76,6 +84,7 @@ def dp_train_args():
doc_template_script = "File names of the template training script. It can be a `List[str]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `str`, the models share the same template training script. "
doc_init_models_paths = "the paths to initial models"
doc_init_models_uri = "The URI of initial models"
doc_optional_files = "Optional files for training"

return [
Argument(
Expand Down Expand Up @@ -105,6 +114,13 @@ def dp_train_args():
default=None,
doc=doc_init_models_uri,
),
Argument(
"optional_files",
list,
optional=True,
default=None,
doc=doc_optional_files,
),
]


Expand Down
4 changes: 4 additions & 0 deletions dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def make_concurrent_learning_op(
cl_step_config: dict = default_config,
upload_python_packages: Optional[List[os.PathLike]] = None,
valid_data: Optional[S3Artifact] = None,
train_optional_files: Optional[List[str]] = None,
):
if train_style in ("dp", "dp-dist"):
prep_run_train_op = PrepRunDPTrain(
Expand All @@ -165,6 +166,7 @@ def make_concurrent_learning_op(
run_config=run_train_config,
upload_python_packages=upload_python_packages,
valid_data=valid_data,
optional_files=train_optional_files,
)
else:
raise RuntimeError(f"unknown train_style {train_style}")
Expand Down Expand Up @@ -441,6 +443,7 @@ def workflow_concurrent_learning(
collect_data_config = config["step_configs"]["collect_data_config"]
cl_step_config = config["step_configs"]["cl_step_config"]
upload_python_packages = config.get("upload_python_packages", None)
train_optional_files = config["train"].get("optional_files", None)

if train_style == "dp":
init_models_paths = config["train"].get("init_models_paths", None)
Expand Down Expand Up @@ -490,6 +493,7 @@ def workflow_concurrent_learning(
cl_step_config=cl_step_config,
upload_python_packages=upload_python_packages,
valid_data=valid_data,
train_optional_files=train_optional_files,
)
scheduler = make_naive_exploration_scheduler(config)

Expand Down
10 changes: 10 additions & 0 deletions dpgen2/fp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
PrepFpOpAbacus,
RunFpOpAbacus,
)
from .cp2k import (
FpOpCp2kInputs,
PrepFpOpCp2k,
RunFpOpCp2k,
)
from .deepmd import (
DeepmdInputs,
PrepDeepmd,
Expand Down Expand Up @@ -40,4 +45,9 @@
"prep": PrepFpOpAbacus,
"run": RunFpOpAbacus,
},
"fpop_cp2k": {
"inputs": FpOpCp2kInputs,
"prep": PrepFpOpCp2k,
"run": RunFpOpCp2k,
},
}
9 changes: 1 addition & 8 deletions dpgen2/fp/abacus.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def execute(
sys = dpdata.LabeledSystem(str(workdir), fmt="abacus/relax")
else:
raise ValueError("Type of calculation %s not supported" % calculation)
out_name = run_config.get("out", fp_default_out_data_name)
out_name = fp_default_out_data_name
sys.to("deepmd/npy", workdir / out_name)

return OPIO(
Expand All @@ -212,13 +212,6 @@ def execute(
@staticmethod
def args():
doc_cmd = "The command of abacus"
doc_out = (
"The output dir name of labeled data. "
"In `deepmd/npy` format provided by `dpdata`."
)
return [
Argument("command", str, optional=True, default="abacus", doc=doc_cmd),
Argument(
"out", str, optional=True, default=fp_default_out_data_name, doc=doc_out
),
]
185 changes: 185 additions & 0 deletions dpgen2/fp/cp2k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import os
from pathlib import (
Path,
)
from typing import (
List,
Optional,
)

import dpdata
from dargs import (
Argument,
)
from dflow.python import (
OP,
OPIO,
Artifact,
BigParameter,
OPIOSign,
)

try:
from fpop.cp2k import (
Cp2kInputs,
PrepCp2k,
RunCp2k,
)
except ModuleNotFoundError:
Cp2kInputs = PrepCp2k = RunCp2k = object

from ..constants import (
fp_default_out_data_name,
)


class FpOpCp2kInputs(Cp2kInputs): # type: ignore
@staticmethod
def args():
doc_inp_file = "The path to the user-submitted CP2K input file."
return [
Argument("inp_file", str, optional=False, doc=doc_inp_file),
]


class PrepFpOpCp2k(OP):
@classmethod
def get_input_sign(cls):
return OPIOSign(
{
"config": BigParameter(dict),
"type_map": List[str],
"confs": Artifact(List[Path]),
}
)

@classmethod
def get_output_sign(cls):
return OPIOSign(
{
"task_names": BigParameter(List[str]),
"task_paths": Artifact(List[Path]),
}
)

@OP.exec_sign_check
def execute(
self,
ip: OPIO,
) -> OPIO:
confs = []
# remove atom types with 0 atom from type map
# for all atom types in the type map
for p in ip["confs"]:
for f in p.rglob("type.raw"):
system = f.parent
s = dpdata.System(system, fmt="deepmd/npy")
atom_numbs = []
atom_names = []
for numb, name in zip(s["atom_numbs"], s["atom_names"]): # type: ignore https://github.com/microsoft/pyright/issues/5620
if numb > 0:
atom_numbs.append(numb)
atom_names.append(name)
if atom_names != s["atom_names"]:
for i, t in enumerate(s["atom_types"]): # type: ignore https://github.com/microsoft/pyright/issues/5620
s["atom_types"][i] = atom_names.index(s["atom_names"][t]) # type: ignore https://github.com/microsoft/pyright/issues/5620
s.data["atom_numbs"] = atom_numbs
s.data["atom_names"] = atom_names
target = "output/%s" % system
s.to("deepmd/npy", target)
confs.append(Path(target))
else:
confs.append(system)
op_in = OPIO(
{
"inputs": ip["config"]["inputs"],
"type_map": ip["type_map"],
"confs": confs,
"prep_image_config": ip["config"].get("prep", {}),
}
)
op = PrepCp2k()
return op.execute(op_in) # type: ignore in the case of not importing fpop


def get_run_type(lines: List[str]) -> Optional[str]:
for line in lines:
if "RUN_TYPE" in line:
return line.split()[-1]
return None


class RunFpOpCp2k(OP):
@classmethod
def get_input_sign(cls):
return OPIOSign(
{
"config": BigParameter(dict),
"task_name": BigParameter(str),
"task_path": Artifact(Path),
}
)

@classmethod
def get_output_sign(cls):
return OPIOSign(
{
"log": Artifact(Path),
"labeled_data": Artifact(Path),
}
)

@OP.exec_sign_check
def execute(
self,
ip: OPIO,
) -> OPIO:
run_config = ip["config"].get("run", {})
op_in = OPIO(
{
"task_name": ip["task_name"],
"task_path": ip["task_path"],
"backward_list": [],
"log_name": "output.log",
"run_image_config": run_config,
}
)
op = RunCp2k()
op_out = op.execute(op_in) # type: ignore in the case of not importing fpop
workdir = op_out["backward_dir"].parent

file_path = os.path.join(str(workdir), "output.log")

# convert the output to deepmd/npy format
with open(workdir / "input.inp", "r") as f:
lines = f.readlines()

# 获取 RUN_TYPE
run_type = get_run_type(lines)

if run_type == "ENERGY_FORCE":
sys = dpdata.LabeledSystem(file_path, fmt="cp2kdata/e_f")
elif run_type == "MD":
sys = dpdata.LabeledSystem(
str(workdir), cp2k_output_name="output.log", fmt="cp2kdata/md"
)
else:
raise ValueError(f"Type of calculation {run_type} not supported")

# out_name = run_config.get("out", fp_default_out_data_name)
out_name = fp_default_out_data_name
sys.to("deepmd/npy", workdir / out_name)

return OPIO(
{
"log": workdir / "output.log",
"labeled_data": workdir / out_name,
}
)

@staticmethod
def args():
doc_cmd = "The command of cp2k"
return [
Argument("command", str, optional=True, default="cp2k", doc=doc_cmd),
]
5 changes: 5 additions & 0 deletions dpgen2/op/run_dp_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def get_input_sign(cls):
"init_data": Artifact(NestedDict[Path]),
"iter_data": Artifact(List[Path]),
"valid_data": Artifact(List[Path], optional=True),
"optional_files": Artifact(List[Path], optional=True),
}
)

Expand Down Expand Up @@ -328,6 +329,10 @@ def clean_before_quit():
with open(train_script_name, "w") as fp:
json.dump(train_dict, fp, indent=4)

if ip["optional_files"] is not None:
for f in ip["optional_files"]:
Path(f.name).symlink_to(f)

# train model
command = _make_train_command(
dp_command,
Expand Down
6 changes: 6 additions & 0 deletions dpgen2/superop/prep_run_dp_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
run_config: dict = normalize_step_dict({}),
upload_python_packages: Optional[List[os.PathLike]] = None,
valid_data: Optional[S3Artifact] = None,
optional_files: Optional[List[str]] = None,
):
self._input_parameters = {
"block_id": InputParameter(type=str, value=""),
Expand Down Expand Up @@ -119,6 +120,7 @@ def __init__(
run_config=run_config,
upload_python_packages=upload_python_packages,
valid_data=valid_data,
optional_files=optional_files,
)

@property
Expand Down Expand Up @@ -151,6 +153,7 @@ def _prep_run_dp_train(
run_config: dict = normalize_step_dict({}),
upload_python_packages: Optional[List[os.PathLike]] = None,
valid_data: Optional[S3Artifact] = None,
optional_files: Optional[List[str]] = None,
):
prep_config = deepcopy(prep_config)
run_config = deepcopy(run_config)
Expand Down Expand Up @@ -206,6 +209,9 @@ def _prep_run_dp_train(
"init_data": train_steps.inputs.artifacts["init_data"],
"iter_data": train_steps.inputs.artifacts["iter_data"],
"valid_data": valid_data,
"optional_files": upload_artifact(optional_files)
if optional_files is not None
else None,
},
with_sequence=argo_sequence(
argo_len(prep_train.outputs.parameters["task_names"]),
Expand Down
Loading

0 comments on commit fdefc70

Please sign in to comment.