Skip to content

Commit

Permalink
Add a more detailed introduction for model compression (#772)
Browse files Browse the repository at this point in the history
  • Loading branch information
denghuilu authored Jun 20, 2021
1 parent f326a86 commit 44d49f3
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 36 deletions.
16 changes: 8 additions & 8 deletions deepmd/entrypoints/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def compress(
input: str,
output: str,
extrapolate: int,
stride: float,
step: float,
frequency: str,
checkpoint_folder: str,
mpi_log: str,
Expand All @@ -34,9 +34,9 @@ def compress(
"""Compress model.
The table is composed of fifth-order polynomial coefficients and is assembled from
two sub-tables. The first table takes the stride(parameter) as it's uniform stride,
while the second table takes 10 * stride as it's uniform stride. The range of the
first table is automatically detected by deepmd-kit, while the second table ranges
two sub-tables. The first table takes the step parameter as the domain's uniform step size,
while the second table takes 10 * step as it's uniform step size. The range of the
first table is automatically detected by the code, while the second table ranges
from the first table's upper boundary(upper) to the extrapolate(parameter) * upper.
Parameters
Expand All @@ -49,8 +49,8 @@ def compress(
compressed model filename
extrapolate : int
scale of model extrapolation
stride : float
uniform stride of tabulation's first table
step : float
uniform step size of the tabulation's first table
frequency : str
frequency of tabulation overflow check
checkpoint_folder : str
Expand All @@ -71,8 +71,8 @@ def compress(
jdata["model"]["compress"]["model_file"] = input
jdata["model"]["compress"]["table_config"] = [
extrapolate,
stride,
10 * stride,
step,
10 * step,
int(frequency),
]
# be careful here, if one want to refine the model
Expand Down
35 changes: 21 additions & 14 deletions deepmd/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ def parse_args(args: Optional[List[str]] = None):
# * compress model *****************************************************************
# Compress a model, which including tabulating the embedding-net.
# The table is composed of fifth-order polynomial coefficients and is assembled
# from two sub-tables. The first table takes the stride(parameter) as it's uniform
# stride, while the second table takes 10 * stride as it\s uniform stride
# from two sub-tables. The first table takes the step(parameter) as it's uniform
# step, while the second table takes 10 * step as it\s uniform step
#  The range of the first table is automatically detected by deepmd-kit, while the
# second table ranges from the first table's upper boundary(upper) to the
# extrapolate(parameter) * upper.
Expand All @@ -263,36 +263,43 @@ def parse_args(args: Optional[List[str]] = None):
"--input",
default="frozen_model.pb",
type=str,
help="The original frozen model, which will be compressed by the deepmd-kit",
help="The original frozen model, which will be compressed by the code",
)
parser_compress.add_argument(
"-o",
"--output",
default="frozen_model_compress.pb",
default="frozen_model_compressed.pb",
type=str,
help="The compressed model",
)
parser_compress.add_argument(
"-s",
"--step",
default=0.01,
type=float,
help="Model compression uses fifth-order polynomials to interpolate the embedding-net. "
"It introduces two tables with different step size to store the parameters of the polynomials. "
"The first table covers the range of the training data, while the second table is an extrapolation of the training data. "
"The domain of each table is uniformly divided by a given step size. "
"And the step(parameter) denotes the step size of the first table and the second table will "
"use 10 * step as it's step size to save the memory. "
"Usually the value ranges from 0.1 to 0.001. "
"Smaller step means higher accuracy and bigger model size",
)
parser_compress.add_argument(
"-e",
"--extrapolate",
default=5,
type=int,
help="The scale of model extrapolation",
)
parser_compress.add_argument(
"-s",
"--stride",
default=0.01,
type=float,
help="The uniform stride of tabulation's first table, the second table will "
"use 10 * stride as it's uniform stride",
help="The domain range of the first table is automatically detected by the code: [d_low, d_up]. "
"While the second table ranges from the first table's upper boundary(d_up) to the extrapolate(parameter) * d_up: [d_up, extrapolate * d_up]",
)
parser_compress.add_argument(
"-f",
"--frequency",
default=-1,
type=int,
help="The frequency of tabulation overflow check(If the input environment "
help="The frequency of tabulation overflow check(Whether the input environment "
"matrix overflow the first or second table range). "
"By default do not check the overflow",
)
Expand Down
47 changes: 37 additions & 10 deletions doc/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,23 +244,50 @@ positional arguments:

optional arguments:
-h, --help show this help message and exit
-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0}
set verbosity level by string or number, 0=ERROR,
1=WARNING, 2=INFO and 3=DEBUG (default: INFO)
-l LOG_PATH, --log-path LOG_PATH
set log file to log messages to disk, if not
specified, the logs will only be output to console
(default: None)
-m {master,collect,workers}, --mpi-log {master,collect,workers}
Set the manner of logging when running with MPI.
'master' logs only on main process, 'collect'
broadcasts logs from workers to master and 'workers'
means each process will output its own log (default:
master)
-i INPUT, --input INPUT
The original frozen model, which will be compressed by
the deepmd-kit
the code (default: frozen_model.pb)
-o OUTPUT, --output OUTPUT
The compressed model
The compressed model (default:
frozen_model_compressed.pb)
-s STEP, --step STEP Model compression uses fifth-order polynomials to
interpolate the embedding-net. It introduces two
tables with different step size to store the
parameters of the polynomials. The first table covers
the range of the training data, while the second table
is an extrapolation of the training data. The domain
of each table is uniformly divided by a given step
size. And the step(parameter) denotes the step size of
the first table and the second table will use 10 *
step as it's step size to save the memory. Usually the
value ranges from 0.1 to 0.001. Smaller step means
higher accuracy and bigger model size (default: 0.01)
-e EXTRAPOLATE, --extrapolate EXTRAPOLATE
The scale of model extrapolation
-s STRIDE, --stride STRIDE
The uniform stride of tabulation's first table, the
second table will use 10 * stride as it's uniform
stride
The domain range of the first table is automatically
detected by the code: [d_low, d_up]. While the second
table ranges from the first table's upper
boundary(d_up) to the extrapolate(parameter) * d_up:
[d_up, extrapolate * d_up] (default: 5)
-f FREQUENCY, --frequency FREQUENCY
The frequency of tabulation overflow check(If the
The frequency of tabulation overflow check(Whether the
input environment matrix overflow the first or second
table range). By default do not check the overflow
-d FOLDER, --folder FOLDER
path to checkpoint folder
(default: -1)
-c CHECKPOINT_FOLDER, --checkpoint-folder CHECKPOINT_FOLDER
path to checkpoint folder (default: .)
```
**Parameter explanation**
Expand Down
8 changes: 4 additions & 4 deletions source/tests/test_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,10 @@ def test_parser_compress(self):
ARGS = {
"INPUT": dict(type=str, value="INFILE"),
"--output": dict(type=str, value="OUTFILE"),
"--extrapolate": dict(type=int, value=10),
"--stride": dict(type=float, value=0.1),
"--frequency": dict(type=int, value=1),
"--checkpoint-folder": dict(type=str, value="FOLDER"),
"--extrapolate": dict(type=int, value=5),
"--step": dict(type=float, value=0.1),
"--frequency": dict(type=int, value=-1),
"--checkpoint-folder": dict(type=str, value="."),
}

self.run_test(command="compress", mapping=ARGS)
Expand Down

0 comments on commit 44d49f3

Please sign in to comment.