diff --git a/.git_bin_path b/.git_bin_path
index 925d16083..8f75b6292 100644
--- a/.git_bin_path
+++ b/.git_bin_path
@@ -1,7 +1,7 @@
{"leaf_name": "data/test", "leaf_file": ["data/test/batch_criteo_sample.tfrecord", "data/test/criteo_sample.tfrecord", "data/test/dwd_avazu_ctr_deepmodel_10w.csv", "data/test/embed_data.csv", "data/test/lookup_data.csv", "data/test/tag_kv_data.csv", "data/test/test.csv", "data/test/test_sample_weight.txt", "data/test/test_with_quote.csv"]}
{"leaf_name": "data/test/export", "leaf_file": ["data/test/export/data.csv"]}
{"leaf_name": "data/test/hpo_test/eval_val", "leaf_file": ["data/test/hpo_test/eval_val/events.out.tfevents.1597889819.j63d04245.sqa.eu95"]}
-{"leaf_name": "data/test/inference", "leaf_file": ["data/test/inference/lookup_data_test80.csv", "data/test/inference/taobao_infer_data.txt"]}
+{"leaf_name": "data/test/inference", "leaf_file": ["data/test/inference/lookup_data_test80.csv", "data/test/inference/taobao_infer_data.txt", "data/test/inference/taobao_infer_rtp_data.txt"]}
{"leaf_name": "data/test/inference/fg_export_multi", "leaf_file": ["data/test/inference/fg_export_multi/saved_model.pb"]}
{"leaf_name": "data/test/inference/fg_export_multi/assets", "leaf_file": ["data/test/inference/fg_export_multi/assets/pipeline.config"]}
{"leaf_name": "data/test/inference/fg_export_multi/variables", "leaf_file": ["data/test/inference/fg_export_multi/variables/variables.data-00000-of-00001", "data/test/inference/fg_export_multi/variables/variables.index"]}
@@ -20,6 +20,9 @@
{"leaf_name": "data/test/inference/tb_multitower_placeholder_rename_export", "leaf_file": ["data/test/inference/tb_multitower_placeholder_rename_export/saved_model.pb"]}
{"leaf_name": "data/test/inference/tb_multitower_placeholder_rename_export/assets", "leaf_file": ["data/test/inference/tb_multitower_placeholder_rename_export/assets/pipeline.config"]}
{"leaf_name": "data/test/inference/tb_multitower_placeholder_rename_export/variables", "leaf_file": ["data/test/inference/tb_multitower_placeholder_rename_export/variables/variables.data-00000-of-00001", "data/test/inference/tb_multitower_placeholder_rename_export/variables/variables.index"]}
+{"leaf_name": "data/test/inference/tb_multitower_rtp_export", "leaf_file": ["data/test/inference/tb_multitower_rtp_export/saved_model.pb"]}
+{"leaf_name": "data/test/inference/tb_multitower_rtp_export/assets", "leaf_file": ["data/test/inference/tb_multitower_rtp_export/assets/pipeline.config"]}
+{"leaf_name": "data/test/inference/tb_multitower_rtp_export/variables", "leaf_file": ["data/test/inference/tb_multitower_rtp_export/variables/variables.data-00000-of-00001", "data/test/inference/tb_multitower_rtp_export/variables/variables.index"]}
{"leaf_name": "data/test/latest_ckpt_test", "leaf_file": ["data/test/latest_ckpt_test/model.ckpt-500.data-00000-of-00001", "data/test/latest_ckpt_test/model.ckpt-500.index", "data/test/latest_ckpt_test/model.ckpt-500.meta"]}
{"leaf_name": "data/test/rtp", "leaf_file": ["data/test/rtp/taobao_fg_pred.out", "data/test/rtp/taobao_test_bucketize_feature.txt", "data/test/rtp/taobao_test_feature.txt", "data/test/rtp/taobao_test_input.txt", "data/test/rtp/taobao_train_bucketize_feature.txt", "data/test/rtp/taobao_train_feature.txt", "data/test/rtp/taobao_train_input.txt", "data/test/rtp/taobao_valid.csv", "data/test/rtp/taobao_valid_feature.txt"]}
{"leaf_name": "data/test/tb_data", "leaf_file": ["data/test/tb_data/taobao_ad_feature_gl", "data/test/tb_data/taobao_clk_edge_gl", "data/test/tb_data/taobao_multi_seq_test_data", "data/test/tb_data/taobao_multi_seq_train_data", "data/test/tb_data/taobao_noclk_edge_gl", "data/test/tb_data/taobao_test_data", "data/test/tb_data/taobao_test_data_for_expr", "data/test/tb_data/taobao_test_data_kd", "data/test/tb_data/taobao_train_data", "data/test/tb_data/taobao_train_data_for_expr", "data/test/tb_data/taobao_train_data_kd", "data/test/tb_data/taobao_user_profile_gl"]}
diff --git a/.git_bin_url b/.git_bin_url
index 1d0cd2136..c8394cd8a 100644
--- a/.git_bin_url
+++ b/.git_bin_url
@@ -1,7 +1,7 @@
{"leaf_path": "data/test", "sig": "656d73b4e78d0d71e98120050bc51387", "remote_path": "data/git_oss_sample_data/data_test_656d73b4e78d0d71e98120050bc51387"}
{"leaf_path": "data/test/export", "sig": "c2e5ad1e91edb55b215ea108b9f14537", "remote_path": "data/git_oss_sample_data/data_test_export_c2e5ad1e91edb55b215ea108b9f14537"}
{"leaf_path": "data/test/hpo_test/eval_val", "sig": "fef5f6cd659c35b713c1b8bcb97c698f", "remote_path": "data/git_oss_sample_data/data_test_hpo_test_eval_val_fef5f6cd659c35b713c1b8bcb97c698f"}
-{"leaf_path": "data/test/inference", "sig": "e2c4b0f07ff8568eb7b8e1819326d296", "remote_path": "data/git_oss_sample_data/data_test_inference_e2c4b0f07ff8568eb7b8e1819326d296"}
+{"leaf_path": "data/test/inference", "sig": "9725274cad0f27baf561ebfaf7946846", "remote_path": "data/git_oss_sample_data/data_test_inference_9725274cad0f27baf561ebfaf7946846"}
{"leaf_path": "data/test/inference/fg_export_multi", "sig": "c6690cef053aed9e2011bbef90ef33e7", "remote_path": "data/git_oss_sample_data/data_test_inference_fg_export_multi_c6690cef053aed9e2011bbef90ef33e7"}
{"leaf_path": "data/test/inference/fg_export_multi/assets", "sig": "7fe7a4525f5d46cc763172f5200e96e0", "remote_path": "data/git_oss_sample_data/data_test_inference_fg_export_multi_assets_7fe7a4525f5d46cc763172f5200e96e0"}
{"leaf_path": "data/test/inference/fg_export_multi/variables", "sig": "1f9aad9744382c6d5b5f152d556d9b30", "remote_path": "data/git_oss_sample_data/data_test_inference_fg_export_multi_variables_1f9aad9744382c6d5b5f152d556d9b30"}
@@ -20,6 +20,9 @@
{"leaf_path": "data/test/inference/tb_multitower_placeholder_rename_export", "sig": "dc05357e52fd574cba48165bc67af906", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_placeholder_rename_export_dc05357e52fd574cba48165bc67af906"}
{"leaf_path": "data/test/inference/tb_multitower_placeholder_rename_export/assets", "sig": "750925c4866bf1db8c3188f604271c72", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_placeholder_rename_export_assets_750925c4866bf1db8c3188f604271c72"}
{"leaf_path": "data/test/inference/tb_multitower_placeholder_rename_export/variables", "sig": "56850b4506014ce1bd3ba9b6d60e2770", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_placeholder_rename_export_variables_56850b4506014ce1bd3ba9b6d60e2770"}
+{"leaf_path": "data/test/inference/tb_multitower_rtp_export", "sig": "f1bc6238cfab648812afca093da5dd6b", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_rtp_export_f1bc6238cfab648812afca093da5dd6b"}
+{"leaf_path": "data/test/inference/tb_multitower_rtp_export/assets", "sig": "ae1cc9ec956fb900e5df45c4ec255c4b", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_rtp_export_assets_ae1cc9ec956fb900e5df45c4ec255c4b"}
+{"leaf_path": "data/test/inference/tb_multitower_rtp_export/variables", "sig": "efe52ef308fd6452f3b67fd04cdd22bd", "remote_path": "data/git_oss_sample_data/data_test_inference_tb_multitower_rtp_export_variables_efe52ef308fd6452f3b67fd04cdd22bd"}
{"leaf_path": "data/test/latest_ckpt_test", "sig": "d41d8cd98f00b204e9800998ecf8427e", "remote_path": "data/git_oss_sample_data/data_test_latest_ckpt_test_d41d8cd98f00b204e9800998ecf8427e"}
{"leaf_path": "data/test/rtp", "sig": "76cda60582617ddbb7cd5a49eb68a4b9", "remote_path": "data/git_oss_sample_data/data_test_rtp_76cda60582617ddbb7cd5a49eb68a4b9"}
{"leaf_path": "data/test/tb_data", "sig": "c8136915b6e5e9d96b18448cf2e21d3d", "remote_path": "data/git_oss_sample_data/data_test_tb_data_c8136915b6e5e9d96b18448cf2e21d3d"}
diff --git a/docs/source/models/mind.md b/docs/source/models/mind.md
index 07f0a63bf..84037fa34 100644
--- a/docs/source/models/mind.md
+++ b/docs/source/models/mind.md
@@ -85,7 +85,7 @@ model_config:{
# use the same numer of capsules for all users
const_caps_num: true
}
-
+
simi_pow: 20
l2_regularization: 1e-6
time_id_fea: "seq_ts_gap"
@@ -101,7 +101,7 @@ model_config:{
- dnn:
- hidden_units: dnn每一层的channel数
- use_bn: 是否使用batch_norm, 默认是true
-- item_dnn: item侧的dnn参数, 配置同user_dnn
+- item_dnn: item侧的dnn参数, 配置同user_dnn
- note: item侧不能用batch_norm
- pre_capsule_dnn: 进入capsule之前的dnn的配置
- 可选, 配置同user_dnn和item_dnn
@@ -117,7 +117,7 @@ model_config:{
- squash_pow: 对squash加的power, 防止squash之后的向量值变得太小
- simi_pow: 对相似度做的倍数, 放大interests之间的差异
- embedding_regularization: 对embedding部分加regularization,防止overfit
-- user_seq_combine:
+- user_seq_combine:
- CONCAT: 多个seq之间采取concat的方式融合
- SUM: 多个seq之间采取sum的方式融合, default是SUM
- time_id_fea: time_id feature的name, 对应feature_config里面定义的特征
@@ -128,6 +128,7 @@ model_config:{
- 行为序列特征可以加上time_id, time_id经过1 dimension的embedding后, 在time维度进行softmax, 然后和其它sequence feature的embedding相乘
- time_id取值的方式可参考:
+
- 训练数据: Math.round((2 * Math.log1p((labelTime - itemTime) / 60.) / Math.log(2.))) + 1
- inference: Math.round((2 * Math.log1p((currentTime - itemTime) / 60.) / Math.log(2.))) + 1
- 此处的时间(labelTime, itemTime, currentTime) 为seconds
@@ -136,17 +137,19 @@ model_config:{
- 使用增量训练,增量训练可以防止负采样的穿越。
-- 使用HPO对squash_pow[0.1 - 1.0]和simi_pow[10 - 100]进行搜索调优。
+- 使用HPO对squash_pow\[0.1 - 1.0\]和simi_pow\[10 - 100\]进行搜索调优。
- 要看的指标是召回率,准确率和兴趣损失,三个指标要一起看。
- 使用全网的点击数据来生成训练样本,全网的行为会更加丰富,这有利于mind模型的训练。
- 数据清洗:
+
- 把那些行为太少的item直接在构造行为序列的时候就挖掉
- 排除爬虫或者作弊用户
- 数据采样:
+
- mind模型的训练默认是以点击为目标
- 如果业务指标是到交易,那么可以对交易的样本重采样
@@ -155,9 +158,11 @@ model_config:{
[MIND_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/mind_on_taobao_neg_sam.config)
### 效果评估
+
离线的效果评估主要看在测试集上的hitrate. 可以参考文档[效果评估](https://easyrec.oss-cn-beijing.aliyuncs.com/docs/recall_eval.pdf)
#### 评估sql
+
```sql
pai -name tensorflow1120_cpu_ext
-Dscript='oss://easyrec/deploy/easy_rec/python/tools/hitrate.py'
@@ -204,15 +209,18 @@ pai -name tensorflow1120_cpu_ext
- 1: Inner Product similarity
- emb_dim: user / item表征向量的维度
- top_k: knn检索取top_k计算hitrate
-- recall_type:
+- recall_type:
- u2i: user to item retrieval
#### 评估结果
+
输出下面两张表
- mind_hitrate_details:
+
- 输出每一个user的hitrate = user_hits / user_recalls
- 格式如下:
+
```text
id : bigint
topk_ids : string
@@ -221,10 +229,12 @@ pai -name tensorflow1120_cpu_ext
bad_ids : string
bad_dists : string
```
-
+
- mind_total_hitrate:
+
- 输出平均hitrate = SUM(user_hits) / SUM(user_recalls)
- 格式如下:
+
```text
hitrate : double
```
diff --git a/docs/source/pre_check.md b/docs/source/pre_check.md
index efc476594..1a1d5216a 100644
--- a/docs/source/pre_check.md
+++ b/docs/source/pre_check.md
@@ -3,12 +3,12 @@
为解决用户常由于脏数据或配置错误的原因,导致训练失败,开发了预检查功能。
在训练时打开检查模式,或是训练前执行pre_check脚本,即会检查data_config配置及train_config部分配置,筛查全部数据,遇到异常则抛出相关信息,并给出修改意见。
-
### 命令
#### Local
方式一: 执行pre_check脚本:
+
```bash
PYTHONPATH=. python easy_rec/python/tools/pre_check.py --pipeline_config_path samples/model_config/din_on_taobao.config --data_input_path data/test/check_data/csv_data_for_check
```
@@ -16,17 +16,19 @@ PYTHONPATH=. python easy_rec/python/tools/pre_check.py --pipeline_config_path sa
方式二: 训练时打开检查模式(默认关闭):
该方式会影响训练速度,线上例行训练时不建议开启检查模式。
+
```bash
python -m easy_rec.python.train_eval --pipeline_config_path samples/model_config/din_on_taobao.config --check_mode
```
+
- pipeline_config_path config文件路径
- data_input_path 待检查的数据路径,不指定的话为pipeline_config_path中的train_input_path及eval_input_path
- check_mode 默认False
-
#### On PAI
方式一: 执行pre_check脚本:
+
```sql
pai -name easy_rec_ext -project algo_public
-Dcmd='check'
@@ -42,6 +44,7 @@ pai -name easy_rec_ext -project algo_public
方式二: 训练时打开检查模式(默认关闭):
该方式会影响训练速度,线上例行训练时不建议开启检查模式。
+
```sql
pai -name easy_rec_ext -project algo_public
-Dcmd='train'
diff --git a/docs/source/proto.html b/docs/source/proto.html
index ed7b5fde0..92c119f74 100644
--- a/docs/source/proto.html
+++ b/docs/source/proto.html
@@ -63,57 +63,57 @@
}
td p:nth-child(1) {
- text-indent: 0;
+ text-indent: 0;
}
-
- .field-table td:nth-child(1) {
+
+ .field-table td:nth-child(1) {
width: 10em;
}
- .field-table td:nth-child(2) {
+ .field-table td:nth-child(2) {
width: 10em;
}
- .field-table td:nth-child(3) {
+ .field-table td:nth-child(3) {
width: 6em;
}
- .field-table td:nth-child(4) {
+ .field-table td:nth-child(4) {
width: auto;
}
-
- .extension-table td:nth-child(1) {
+
+ .extension-table td:nth-child(1) {
width: 10em;
}
- .extension-table td:nth-child(2) {
+ .extension-table td:nth-child(2) {
width: 10em;
}
- .extension-table td:nth-child(3) {
+ .extension-table td:nth-child(3) {
width: 10em;
}
- .extension-table td:nth-child(4) {
+ .extension-table td:nth-child(4) {
width: 5em;
}
- .extension-table td:nth-child(5) {
+ .extension-table td:nth-child(5) {
width: auto;
}
-
- .enum-table td:nth-child(1) {
+
+ .enum-table td:nth-child(1) {
width: 10em;
}
- .enum-table td:nth-child(2) {
+ .enum-table td:nth-child(2) {
width: 10em;
}
- .enum-table td:nth-child(3) {
+ .enum-table td:nth-child(3) {
width: auto;
}
-
+
.scalar-value-types-table tr {
height: 3em;
}
-
+
#toc-container ul {
list-style-type: none;
padding-left: 1em;
@@ -124,7 +124,7 @@
font-weight: bold;
}
-
+
.file-heading {
width: 100%;
display: table;
@@ -140,7 +140,7 @@
display: table-cell;
}
-
+
.badge {
width: 1.6em;
height: 1.6em;
@@ -160,7 +160,7 @@
}
-
+
@@ -172,1076 +172,1076 @@
Table of Contents
-
-
+
+
easy_rec/python/protos/autoint.proto Top
-
+
AutoInt
-
+
Field Type Label Description
-
+
multi_head_num
uint32
required
The number of heads Default: 1
-
+
multi_head_size
uint32
required
The dimension of heads
-
+
interacting_layer_num
uint32
required
The number of interacting layers Default: 1
-
+
l2_regularization
float
required
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/collaborative_metric_learning.proto Top
-
+
CoMetricLearningI2I
-
+
Field Type Label Description
-
+
session_id
string
optional
-
+
highway
HighWayTower
repeated
-
+
input
string
optional
-
+
dnn
DNN
required
-
+
l2_regularization
float
required
Default: 0.0001
-
+
output_l2_normalized_emb
bool
required
Default: true
-
+
sample_id
string
optional
-
+
circle_loss
CircleLoss
optional
-
+
multi_similarity_loss
MultiSimilarityLoss
optional
-
+
item_id
string
optional
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dataset.proto Top
-
+
DatasetConfig
-
+
Field Type Label Description
-
+
batch_size
uint32
optional
mini batch size to use for training and evaluation. Default: 32
-
+
auto_expand_input_fields
bool
@@ -1249,7 +1249,7 @@ DatasetConfig
set auto_expand_input_fields to true to
auto_expand field[1-21] to field1, field2, ..., field21 Default: false
-
+
label_fields
string
@@ -1258,14 +1258,14 @@ DatasetConfig
For multiple target models such as MMOE
multiple label_fields will be set.
-
+
label_sep
string
repeated
label separator
-
+
label_dim
uint32
@@ -1273,14 +1273,14 @@ DatasetConfig
label dimensions which need to be set when there
are labels have dimension > 1
-
+
shuffle
bool
optional
whether to shuffle data Default: true
-
+
shuffle_buffer_size
int32
@@ -1289,7 +1289,7 @@ DatasetConfig
it is suggested to do full data shuffle before training
especially when the performance of models is not good. Default: 32
-
+
num_epochs
uint32
@@ -1297,28 +1297,28 @@ DatasetConfig
The number of times a data source is read. If set to zero, the data source
will be reused indefinitely. Default: 0
-
+
prefetch_size
uint32
optional
Number of decoded batches to prefetch. Default: 32
-
+
shard
bool
optional
shard dataset to 1/num_workers in distribute mode Default: false
-
+
input_type
DatasetConfig.InputType
required
-
+
separator
string
@@ -1332,7 +1332,7 @@ DatasetConfig
for RTPInput and OdpsRTPInput it is usually set
to '\002' Default: ,
-
+
num_parallel_calls
uint32
@@ -1341,7 +1341,7 @@ DatasetConfig
or too large numbers(suggested be to small than
number of the cores) Default: 8
-
+
selected_cols
string
@@ -1351,7 +1351,7 @@ DatasetConfig
such as '1,2,4', where 1,2 are label columns, and
4 is the feature column, column 0,3 are not used,
-
+
selected_col_types
string
@@ -1359,7 +1359,7 @@ DatasetConfig
selected col types, only used for OdpsInput/OdpsInputV2
to avoid error setting of data types
-
+
input_fields
DatasetConfig.Field
@@ -1367,14 +1367,14 @@ DatasetConfig
the input fields must be the same number and in the
same order as data in csv files or odps tables
-
+
rtp_separator
string
optional
for RTPInput only Default: ;
-
+
ignore_error
bool
@@ -1382,7 +1382,7 @@ DatasetConfig
ignore some data errors
it is not suggested to set this parameter Default: false
-
+
pai_worker_queue
bool
@@ -1390,14 +1390,14 @@ DatasetConfig
whether to use pai global shuffle queue, only for OdpsInput,
OdpsInputV2, OdpsRTPInputV2 Default: false
-
+
pai_worker_slice_num
int32
optional
Default: 100
-
+
chief_redundant
bool
@@ -1405,28 +1405,28 @@ DatasetConfig
if true, one worker will duplicate the data of the chief node
and undertake the gradient computation of the chief node Default: false
-
+
sample_weight
string
optional
input field for sample weight
-
+
data_compression_type
string
optional
the compression type of tfrecord
-
+
n_data_batch_tfrecord
uint32
optional
n data for one feature in tfrecord
-
+
with_header
bool
@@ -1436,118 +1436,118 @@ DatasetConfig
and the number and the order of input_fields
may not be the same as that in csv files. Default: false
-
+
negative_sampler
NegativeSampler
optional
-
+
negative_sampler_v2
NegativeSamplerV2
optional
-
+
hard_negative_sampler
HardNegativeSampler
optional
-
+
hard_negative_sampler_v2
HardNegativeSamplerV2
optional
-
+
negative_sampler_in_memory
NegativeSamplerInMemory
optional
-
+
eval_batch_size
uint32
optional
Default: 4096
-
+
-
-
-
+
+
+
DatasetConfig.Field
-
+
Field Type Label Description
-
+
input_name
string
required
-
+
input_type
DatasetConfig.FieldType
required
Default: STRING
-
+
default_val
string
optional
-
+
input_dim
uint32
optional
Default: 1
-
+
input_shape
uint32
optional
Default: 1
-
+
-
-
-
+
+
+
HardNegativeSampler
Weighted Random Sampling ItemID not in Batch and Sampling Hard Edge
-
+
Field Type Label Description
-
+
user_input_path
string
@@ -1555,7 +1555,7 @@ HardNegativeSampler
user data path
userid weight
-
+
item_input_path
string
@@ -1563,7 +1563,7 @@ HardNegativeSampler
item data path
itemid weight attrs
-
+
hard_neg_edge_input_path
string
@@ -1571,73 +1571,73 @@ HardNegativeSampler
hard negative edge path
userid itemid weight
-
+
num_sample
uint32
required
number of negative sample
-
+
num_hard_sample
uint32
required
max number of hard negative sample
-
+
attr_fields
string
repeated
field names of attrs in train data or eval data
-
+
item_id_field
string
required
field name of item_id in train data or eval data
-
+
user_id_field
string
required
field name of user_id in train data or eval data
-
+
attr_delimiter
string
optional
Default: :
-
+
num_eval_sample
uint32
optional
Default: 0
-
+
-
-
-
+
+
+
HardNegativeSamplerV2
Weighted Random Sampling ItemID not with Edge and Sampling Hard Edge
-
+
Field Type Label Description
-
+
user_input_path
string
@@ -1645,7 +1645,7 @@ HardNegativeSamplerV2
user data path
userid weight
-
+
item_input_path
string
@@ -1653,7 +1653,7 @@ HardNegativeSamplerV2
item data path
itemid weight attrs
-
+
pos_edge_input_path
string
@@ -1661,7 +1661,7 @@ HardNegativeSamplerV2
positive edge path
userid itemid weight
-
+
hard_neg_edge_input_path
string
@@ -1669,73 +1669,73 @@ HardNegativeSamplerV2
hard negative edge path
userid itemid weight
-
+
num_sample
uint32
required
number of negative sample
-
+
num_hard_sample
uint32
required
max number of hard negative sample
-
+
attr_fields
string
repeated
field names of attrs in train data or eval data
-
+
item_id_field
string
required
field name of item_id in train data or eval data
-
+
user_id_field
string
required
field name of user_id in train data or eval data
-
+
attr_delimiter
string
optional
Default: :
-
+
num_eval_sample
uint32
optional
Default: 0
-
+
-
-
-
+
+
+
NegativeSampler
Weighted Random Sampling ItemID not in Batch
-
+
Field Type Label Description
-
+
input_path
string
@@ -1743,59 +1743,59 @@ NegativeSampler
sample data path
itemid weight attrs
-
+
num_sample
uint32
required
number of negative sample
-
+
attr_fields
string
repeated
field names of attrs in train data or eval data
-
+
item_id_field
string
required
field name of item_id in train data or eval data
-
+
attr_delimiter
string
optional
Default: :
-
+
num_eval_sample
uint32
optional
Default: 0
-
+
-
-
-
+
+
+
NegativeSamplerInMemory
-
+
Field Type Label Description
-
+
input_path
string
@@ -1803,59 +1803,59 @@ NegativeSamplerInMemory
sample data path
itemid weight attrs
-
+
num_sample
uint32
required
number of negative sample
-
+
attr_fields
string
repeated
field names of attrs in train data or eval data
-
+
item_id_field
string
required
field name of item_id in train data or eval data
-
+
attr_delimiter
string
optional
Default: :
-
+
num_eval_sample
uint32
optional
Default: 0
-
+
-
-
-
+
+
+
NegativeSamplerV2
Weighted Random Sampling ItemID not with Edge
-
+
Field Type Label Description
-
+
user_input_path
string
@@ -1863,7 +1863,7 @@ NegativeSamplerV2
user data path
userid weight
-
+
item_input_path
string
@@ -1871,7 +1871,7 @@ NegativeSamplerV2
item data path
itemid weight attrs
-
+
pos_edge_input_path
string
@@ -1879,58 +1879,58 @@ NegativeSamplerV2
positive edge path
userid itemid weight
-
+
num_sample
uint32
required
number of negative sample
-
+
attr_fields
string
repeated
field names of attrs in train data or eval data
-
+
item_id_field
string
required
field name of item_id in train data or eval data
-
+
user_id_field
string
required
field name of user_id in train data or eval data
-
+
attr_delimiter
string
optional
Default: :
-
+
num_eval_sample
uint32
optional
Default: 0
-
+
-
-
-
-
+
+
+
+
DatasetConfig.FieldType
@@ -1938,46 +1938,46 @@ DatasetConfig.FieldType
Name Number Description
-
+
INT32
0
-
+
INT64
1
-
+
STRING
2
-
+
FLOAT
4
-
+
DOUBLE
5
-
+
BOOL
6
-
+
-
+
@@ -1985,878 +1985,878 @@
Name Number Description
-
+
CSVInput
10
csv format input, could be used in local or hdfs
-
+
CSVInputV2
11
@Depreciated
-
+
CSVInputEx
12
extended csv format, allow quote in fields
-
+
OdpsInput
2
@Depreciated, has memory leak problem
-
+
OdpsInputV2
3
odps input, used on pai
-
+
DataHubInput
15
-
+
OdpsInputV3
9
-
+
RTPInput
4
-
+
RTPInputV2
5
-
+
OdpsRTPInput
601
-
+
OdpsRTPInputV2
602
-
+
TFRecordInput
7
-
+
BatchTFRecordInput
14
-
+
DummyInput
8
for the purpose to debug performance bottleneck of
input pipelines
-
+
KafkaInput
13
-
+
HiveInput
16
-
+
CriteoInput
1001
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/data_source.proto Top
-
+
-
+
Field Type Label Description
-
+
category_path
string
repeated
support gfile.Glob
-
+
dense_path
string
repeated
-
+
label_path
string
repeated
-
+
-
-
-
+
+
+
DatahubServer
-
+
Field Type Label Description
-
+
akId
string
required
-
+
akSecret
string
required
-
+
region
string
required
-
+
project
string
required
-
+
topic
string
required
-
+
shard_num
uint32
required
-
+
life_cycle
uint32
required
-
+
-
-
-
+
+
+
KafkaServer
-
+
Field Type Label Description
-
+
server
string
required
-
+
topic
string
required
-
+
group
string
required
-
+
partitions
uint32
required
-
+
offset
uint32
repeated
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dbmtl.proto Top
-
+
DBMTL
-
+
Field Type Label Description
-
+
bottom_dnn
DNN
optional
shared bottom dnn layer
-
+
expert_dnn
DNN
optional
mmoe expert dnn layer definition
-
+
num_expert
uint32
optional
number of mmoe experts Default: 0
-
+
task_towers
BayesTaskTower
repeated
bayes task tower
-
+
l2_regularization
float
optional
l2 regularization Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dcn.proto Top
-
+
CrossTower
-
+
Field Type Label Description
-
+
input
string
required
-
+
cross_num
uint32
required
The number of cross layers Default: 3
-
+
-
-
-
+
+
+
DCN
-
+
Field Type Label Description
-
+
deep_tower
Tower
required
-
+
cross_tower
CrossTower
required
-
+
final_dnn
DNN
required
-
+
l2_regularization
float
required
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/deepfm.proto Top
-
+
DeepFM
-
+
Field Type Label Description
-
+
dnn
DNN
required
-
+
final_dnn
DNN
optional
-
+
wide_output_dim
uint32
optional
Default: 1
-
+
wide_regularization
float
optional
deprecated Default: 0.0001
-
+
dense_regularization
float
optional
deprecated Default: 0.0001
-
+
l2_regularization
float
optional
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dlrm.proto Top
-
+
DLRM
-
+
Field Type Label Description
-
+
top_dnn
DNN
required
-
+
bot_dnn
DNN
required
-
+
arch_interaction_op
string
optional
options are: dot and cat Default: dot
-
+
arch_interaction_itself
bool
optional
whether a feature will interact with itself Default: false
-
+
arch_with_dense_feature
bool
optional
whether to include dense features after interaction Default: false
-
+
l2_regularization
float
optional
Default: 1e-05
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dnn.proto Top
-
+
DNN
-
+
Field Type Label Description
-
+
hidden_units
uint32
repeated
hidden units for each layer
-
+
dropout_ratio
float
repeated
ratio of dropout
-
+
activation
string
optional
activation function Default: tf.nn.relu
-
+
use_bn
bool
optional
use batch normalization Default: true
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dropoutnet.proto Top
-
+
DropoutNet
-
+
Field Type Label Description
-
+
user_content
DNN
required
-
+
user_preference
DNN
required
-
+
item_content
DNN
required
-
+
item_preference
DNN
required
-
+
user_tower
DNN
required
-
+
item_tower
DNN
required
-
+
l2_regularization
float
required
Default: 0
-
+
user_dropout_rate
float
required
Default: 0
-
+
item_dropout_rate
float
required
Default: 0.5
-
+
softmax_loss
SoftmaxCrossEntropyWithNegativeMining
optional
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/dssm.proto Top
-
+
DSSM
-
+
Field Type Label Description
-
+
user_tower
DSSMTower
required
-
+
item_tower
DSSMTower
required
-
+
l2_regularization
float
required
Default: 0.0001
-
+
simi_func
Similarity
optional
Default: COSINE
-
+
scale_simi
bool
optional
add a layer for scaling the similarity Default: true
-
+
item_id
string
optional
-
+
ignore_in_batch_neg_sam
bool
required
Default: false
-
+
-
-
-
+
+
+
DSSMTower
-
+
Field Type Label Description
-
+
id
string
required
-
+
dnn
DNN
required
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/eas_serving.proto Top
-
+
Config
-
+
Field Type Label Description
-
+
column_delim
string
@@ -2864,14 +2864,14 @@ Config
例如输入特征为"1005,109;0;93eaba74",此时分号分割的为column,
逗号分割的为每个column的多个feature, 下划线分割为feature名字和对应的value。
-
+
feature_delim
string
-
+
hash
string
@@ -2879,503 +2879,503 @@ Config
指定字符串hash分桶的算法,支持HarmHash(对应于tf.strings.to_hash_bucket_fast())
和SipHash(对应于tf.strings.to_hash_bucket_strong())两种字符串hash分桶算法
-
+
embeddings
Config.EmbeddingsEntry
repeated
embedding_name to embedding
-
+
embedding_max_norm
Config.EmbeddingMaxNormEntry
repeated
指定embedding lookup的结果的最大L2-norm
-
+
embedding_combiner
Config.EmbeddingCombinerEntry
repeated
指定embedding的combiner策略,支持sum, mean和sqrtn
-
+
model
Model
-
+
-
-
-
+
+
+
Config.EmbeddingCombinerEntry
-
+
Field Type Label Description
-
+
key
string
-
+
value
string
-
+
-
-
-
+
+
+
Config.EmbeddingMaxNormEntry
-
+
Field Type Label Description
-
+
key
string
-
+
value
float
-
+
-
-
-
+
+
+
Config.EmbeddingsEntry
-
+
-
-
-
+
+
+
Embedding
-
+
Field Type Label Description
-
+
partition_num
int32
指定该embedding切分的总数
-
+
parts
EmbeddingPart
repeated
-
+
-
-
-
+
+
+
EmbeddingPart
-
+
Field Type Label Description
-
+
embedding_part_path
string
指定EmbeddingPartData(*.pb)所在的路径
-
+
partition_id
int32
指定该embedding part所属第几个part
-
+
shape
int64
repeated
指定该embedding part的shape(可以从EmbeddingPartData中读取)
-
+
deploy_strategy
string
embedding part的部署策略, 支持本地部署(local)和远程部署(remote)
-
+
-
-
-
+
+
+
EmbeddingPartData
-
+
Field Type Label Description
-
+
shape
int64
repeated
Shape of the embedding
-
+
data
float
repeated
Data
-
+
-
-
-
+
+
+
Model
-
+
Field Type Label Description
-
+
model_path
string
指定模型所在路径,便于加载模型
-
+
model_signature_name
string
指定模型的sinature的名字
-
+
model_inputs
ModelInput
repeated
model input description
-
+
-
-
-
+
+
+
-
+
Field Type Label Description
-
+
feature_name
string
-
+
embedding_name
string
-
+
placeholder_name
string
-
+
weight_name
string
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/easy_rec_model.proto Top
-
+
DummyModel
for input performance test
-
-
-
+
+
+
EasyRecModel
-
+
Field Type Label Description
-
+
model_class
string
required
-
+
feature_groups
FeatureGroupConfig
repeated
actually input layers, each layer produce a group of feature
-
+
dummy
DummyModel
optional
-
+
wide_and_deep
WideAndDeep
optional
-
+
deepfm
DeepFM
optional
-
+
multi_tower
MultiTower
optional
-
+
fm
FM
optional
-
+
dcn
DCN
optional
-
+
autoint
AutoInt
optional
-
+
dlrm
DLRM
optional
-
+
dssm
DSSM
optional
-
+
mind
MIND
optional
-
+
dropoutnet
DropoutNet
optional
-
+
metric_learning
CoMetricLearningI2I
optional
-
+
mmoe
MMoE
optional
-
+
esmm
ESMM
optional
-
+
dbmtl
DBMTL
optional
-
+
simple_multi_task
SimpleMultiTask
optional
-
+
ple
PLE
optional
-
+
rocket_launching
RocketLaunching
optional
-
+
seq_att_groups
SeqAttGroupConfig
repeated
-
+
embedding_regularization
float
@@ -3384,35 +3384,35 @@ EasyRecModel
add regularization to all variables with "embedding_weights:"
in name Default: 0
-
+
loss_type
LossType
optional
Default: CLASSIFICATION
-
+
num_class
uint32
optional
Default: 1
-
+
use_embedding_variable
bool
optional
Default: false
-
+
kd
KD
repeated
-
+
restore_filters
string
@@ -3420,274 +3420,274 @@ EasyRecModel
filter variables matching any pattern in restore_filters
common filters are Adam, Momentum, etc.
-
+
variational_dropout
VariationalDropoutLayer
optional
-
+
losses
Loss
repeated
-
+
f1_reweight_loss
F1ReweighedLoss
optional
-
+
-
-
-
+
+
+
KD
for knowledge distillation
-
+
Field Type Label Description
-
+
loss_name
string
optional
-
+
pred_name
string
required
-
+
pred_is_logits
bool
optional
default to be logits Default: true
-
+
soft_label_name
string
required
for CROSS_ENTROPY_LOSS, soft_label must be logits instead of probs
-
+
label_is_logits
bool
optional
default to be logits Default: true
-
+
loss_type
LossType
required
currently only support CROSS_ENTROPY_LOSS and L2_LOSS
-
+
loss_weight
float
optional
Default: 1
-
+
temperature
float
optional
only for loss_type == CROSS_ENTROPY_LOSS Default: 1
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/esmm.proto Top
-
+
ESMM
-
+
Field Type Label Description
-
+
groups
Tower
repeated
-
+
ctr_tower
TaskTower
required
-
+
cvr_tower
TaskTower
required
-
+
l2_regularization
float
required
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/eval.proto Top
-
+
AUC
-
+
Field Type Label Description
-
+
num_thresholds
uint32
optional
Default: 200
-
+
-
-
-
+
+
+
Accuracy
-
-
-
+
+
+
AvgPrecisionAtTopK
-
+
Field Type Label Description
-
+
topk
uint32
optional
Default: 5
-
+
-
-
-
+
+
+
EvalConfig
Message for configuring EasyRecModel evaluation jobs (eval.py).
-
+
Field Type Label Description
-
+
num_examples
uint32
optional
Number of examples to process of evaluation. Default: 0
-
+
eval_interval_secs
uint32
optional
How often to run evaluation. Default: 300
-
+
max_evals
uint32
optional
Maximum number of times to run evaluation. If set to 0, will run forever. Default: 0
-
+
save_graph
bool
optional
Whether the TensorFlow graph used for evaluation should be saved to disk. Default: false
-
+
metrics_set
EvalMetrics
@@ -3695,139 +3695,139 @@ EvalConfig
Type of metrics to use for evaluation.
possible values:
-
+
eval_online
bool
optional
Evaluation online with batch forward data of training Default: false
-
+
-
-
-
+
+
+
EvalMetrics
-
+
-
-
-
+
+
+
GAUC
-
+
Field Type Label Description
-
+
uid_field
string
required
uid field name
-
+
reduction
string
@@ -3837,97 +3837,97 @@ GAUC
* "mean_by_sample_num": weighted mean with sample num of different users
* "mean_by_positive_num": weighted mean with positive sample num of different users Default: mean
-
+
-
-
-
+
+
+
Max_F1
-
-
-
+
+
+
MeanAbsoluteError
-
-
-
+
+
+
MeanSquaredError
-
-
-
+
+
+
Precision
-
-
-
+
+
+
Recall
-
-
-
+
+
+
RecallAtTopK
-
+
Field Type Label Description
-
+
topk
uint32
optional
Default: 5
-
+
-
-
-
+
+
+
RootMeanSquaredError
-
-
-
+
+
+
SessionAUC
-
+
Field Type Label Description
-
+
session_id_field
string
required
session id field name
-
+
reduction
string
@@ -3937,38 +3937,38 @@ SessionAUC
* "mean_by_sample_num": weighted mean with sample num of different sessions
* "mean_by_positive_num": weighted mean with positive sample num of different sessions Default: mean
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/export.proto Top
-
+
ExportConfig
Message for configuring exporting models.
-
+
Field Type Label Description
-
+
batch_size
int32
@@ -3977,7 +3977,7 @@ ExportConfig
which is only supported by classification model right now, while
other models support static batch_size Default: -1
-
+
exporter_type
string
@@ -3988,28 +3988,28 @@ ExportConfig
latest: export the best model according to best_exporter_metric
none: do not perform export Default: final
-
+
best_exporter_metric
string
optional
the metric used to determine the best checkpoint Default: auc
-
+
metric_bigger
bool
optional
metric value the bigger the best Default: true
-
+
enable_early_stop
bool
optional
enable early stop Default: false
-
+
early_stop_func
string
@@ -4018,258 +4018,258 @@ ExportConfig
early_stop_func(eval_results, early_stop_params)
return True if should stop
-
+
early_stop_params
string
optional
custom early stop parameters
-
+
max_check_steps
int32
optional
early stop max check steps Default: 10000
-
+
multi_placeholder
bool
optional
each feature has a placeholder Default: true
-
+
exports_to_keep
int32
optional
export to keep, only for exporter_type in [best, latest] Default: 1
-
+
multi_value_fields
MultiValueFields
optional
multi value field list
-
+
placeholder_named_by_input
bool
optional
is placeholder named by input Default: false
-
+
filter_inputs
bool
optional
filter out inputs, only keep effective ones Default: true
-
+
export_features
bool
optional
export the original feature values as string Default: false
-
+
export_rtp_outputs
bool
optional
export the outputs required by RTP Default: false
-
+
-
-
-
+
+
+
MultiValueFields
-
+
Field Type Label Description
-
+
input_name
string
repeated
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/feature_config.proto Top
-
+
AttentionCombiner
-
-
-
+
+
+
FeatureConfig
-
+
Field Type Label Description
-
+
feature_name
string
optional
-
+
input_names
string
repeated
input field names: must be included in DatasetConfig.input_fields
-
+
feature_type
FeatureConfig.FeatureType
required
Default: IdFeature
-
+
embedding_name
string
optional
-
+
embedding_dim
uint32
optional
Default: 0
-
+
hash_bucket_size
uint64
optional
Default: 0
-
+
num_buckets
uint64
optional
for categorical_column_with_identity Default: 0
-
+
boundaries
double
repeated
only for raw features
-
+
separator
string
optional
separator with in features Default: |
-
+
kv_separator
string
optional
delimeter to separator key from value
-
+
seq_multi_sep
string
optional
delimeter to separate sequence multi-values
-
+
vocab_file
string
optional
-
+
vocab_list
string
repeated
-
+
shared_names
string
repeated
many other field share this config
-
+
lookup_max_sel_elem_num
int32
optional
lookup max select element number, default 10 Default: 10
-
+
max_partitions
int32
optional
max_partitions Default: 1
-
+
combiner
string
optional
combiner Default: mean
-
+
initializer
Initializer
optional
embedding initializer
-
+
precision
int32
@@ -4278,293 +4278,293 @@ FeatureConfig
scientific format is not used.
in default it is not allowed to convert float/double to string Default: -1
-
+
min_val
double
optional
normalize raw feature to [0-1] Default: 0
-
+
max_val
double
optional
Default: 0
-
+
raw_input_dim
uint32
optional
raw feature of multiple dimensions Default: 1
-
+
sequence_combiner
SequenceCombiner
optional
sequence feature combiner
-
+
sub_feature_type
FeatureConfig.FeatureType
optional
sub feature type for sequence feature Default: IdFeature
-
+
sequence_length
uint32
optional
sequence length Default: 1
-
+
expression
string
optional
for expr feature
-
+
-
-
-
+
+
+
FeatureConfigV2
-
+
Field Type Label Description
-
+
features
FeatureConfig
repeated
-
+
-
-
-
+
+
+
FeatureGroupConfig
-
+
-
-
-
+
+
+
MultiHeadAttentionCombiner
-
-
-
+
+
+
SeqAttGroupConfig
-
+
Field Type Label Description
-
+
group_name
string
optional
-
+
seq_att_map
SeqAttMap
repeated
-
+
tf_summary
bool
optional
Default: false
-
+
seq_dnn
DNN
optional
-
+
allow_key_search
bool
optional
Default: false
-
+
-
-
-
+
+
+
SeqAttMap
-
+
Field Type Label Description
-
+
key
string
repeated
-
+
hist_seq
string
repeated
-
+
-
-
-
+
+
+
SequenceCombiner
-
+
-
-
-
+
+
+
TextCnnCombiner
-
+
Field Type Label Description
-
+
filter_sizes
uint32
repeated
-
+
num_filters
uint32
repeated
-
+
-
-
-
-
+
+
+
+
FeatureConfig.FeatureType
@@ -4572,52 +4572,52 @@ FeatureConfig.FeatureType
Name Number Description
-
+
IdFeature
0
-
+
RawFeature
1
-
+
TagFeature
2
-
+
ComboFeature
3
-
+
LookupFeature
4
-
+
SequenceFeature
5
-
+
ExprFeature
6
-
+
-
+
FeatureConfig.FieldType
@@ -4625,46 +4625,46 @@ FeatureConfig.FieldType
Name Number Description
-
+
INT32
0
-
+
INT64
1
-
+
STRING
2
-
+
FLOAT
4
-
+
DOUBLE
5
-
+
BOOL
6
-
+
-
+
WideOrDeep
@@ -4672,664 +4672,664 @@ WideOrDeep
Name Number Description
-
+
DEEP
0
-
+
WIDE
1
-
+
WIDE_AND_DEEP
2
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/fm.proto Top
-
+
FM
-
+
Field Type Label Description
-
+
l2_regularization
float
optional
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/hive_config.proto Top
-
+
HiveConfig
-
+
Field Type Label Description
-
+
host
string
required
hive master's ip
-
+
port
uint32
required
hive port Default: 10000
-
+
username
string
required
hive username
-
+
database
string
required
hive database Default: default
-
+
table_name
string
required
-
+
hash_fields
string
required
-
+
limit_num
uint32
optional
Default: 0
-
+
fetch_size
uint32
required
Default: 512
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/hyperparams.proto Top
-
+
ConstantInitializer
-
+
Field Type Label Description
-
+
consts
float
repeated
-
+
-
-
-
+
+
+
GlorotNormalInitializer
-
-
-
+
+
+
Initializer
Proto with one-of field for initializers.
-
+
-
-
-
+
+
+
L1L2Regularizer
Configuration proto for L2 Regularizer.
-
+
Field Type Label Description
-
+
scale_l1
float
optional
Default: 1
-
+
scale_l2
float
optional
Default: 1
-
+
-
-
-
+
+
+
L1Regularizer
Configuration proto for L1 Regularizer.
-
+
Field Type Label Description
-
+
scale
float
optional
Default: 1
-
+
-
-
-
+
+
+
L2Regularizer
Configuration proto for L2 Regularizer.
-
+
Field Type Label Description
-
+
scale
float
optional
Default: 1
-
+
-
-
-
+
+
+
RandomNormalInitializer
Configuration proto for random normal initializer. See https://www.tensorflow.org/api_docs/python/tf/random_normal_initializer
-
+
Field Type Label Description
-
+
mean
float
optional
Default: 0
-
+
stddev
float
optional
Default: 1
-
+
-
-
-
+
+
+
Regularizer
Proto with one-of field for regularizers.
-
+
-
-
-
+
+
+
TruncatedNormalInitializer
Configuration proto for truncated normal initializer. See https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
-
+
Field Type Label Description
-
+
mean
float
optional
Default: 0
-
+
stddev
float
optional
Default: 1
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/layer.proto Top
-
+
HighWayTower
-
+
Field Type Label Description
-
+
input
string
required
-
+
emb_size
uint32
required
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/loss.proto Top
-
+
CircleLoss
-
+
Field Type Label Description
-
+
margin
float
required
Default: 0.25
-
+
gamma
float
required
Default: 32
-
+
-
-
-
+
+
+
F1ReweighedLoss
-
+
Field Type Label Description
-
+
f1_beta_square
float
required
Default: 1
-
+
label_smoothing
float
required
Default: 0
-
+
-
-
-
+
+
+
Loss
-
+
Field Type Label Description
-
+
loss_type
LossType
required
-
+
weight
float
required
Default: 1
-
+
-
-
-
+
+
+
MultiSimilarityLoss
-
+
Field Type Label Description
-
+
alpha
float
required
Default: 2
-
+
beta
float
required
Default: 50
-
+
lamb
float
required
Default: 1
-
+
eps
float
required
Default: 0.1
-
+
-
-
-
+
+
+
SoftmaxCrossEntropyWithNegativeMining
-
+
Field Type Label Description
-
+
num_negative_samples
uint32
required
-
+
margin
float
required
Default: 0
-
+
gamma
float
required
Default: 1
-
+
coefficient_of_support_vector
float
required
Default: 1
-
+
-
-
-
-
+
+
+
+
LossType
@@ -5337,148 +5337,148 @@ LossType
Name Number Description
-
+
CLASSIFICATION
0
-
+
L2_LOSS
1
-
+
SIGMOID_L2_LOSS
2
-
+
CROSS_ENTROPY_LOSS
3
crossentropy loss/log loss
-
+
SOFTMAX_CROSS_ENTROPY
4
-
+
CIRCLE_LOSS
5
-
+
MULTI_SIMILARITY_LOSS
6
-
+
SOFTMAX_CROSS_ENTROPY_WITH_NEGATIVE_MINING
7
-
+
PAIR_WISE_LOSS
8
-
+
F1_REWEIGHTED_LOSS
9
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/mind.proto Top
-
+
Capsule
-
+
Field Type Label Description
-
+
max_k
uint32
optional
max number of high capsules Default: 5
-
+
max_seq_len
uint32
required
max behaviour sequence length
-
+
high_dim
uint32
required
high capsule embedding vector dimension
-
+
num_iters
uint32
optional
number EM iterations Default: 3
-
+
routing_logits_scale
float
optional
routing logits scale Default: 20
-
+
routing_logits_stddev
float
optional
routing logits initial stddev Default: 1
-
+
squash_pow
float
optional
squash power Default: 1
-
+
scale_ratio
float
optional
output ratio Default: 1
-
+
const_caps_num
bool
@@ -5486,45 +5486,45 @@ Capsule
constant interest number
in default, use log(seq_len) Default: false
-
+
-
-
-
+
+
+
MIND
-
+
Field Type Label Description
-
+
pre_capsule_dnn
DNN
optional
preprocessing dnn before entering capsule layer
-
+
user_dnn
DNN
required
dnn layers applied on user_context(none sequence features)
-
+
concat_dnn
DNN
required
concat user and capsule dnn
-
+
user_seq_combine
MIND.UserSeqCombineMethod
@@ -5532,21 +5532,21 @@ MIND
method to combine several user sequences
such as item_ids, category_ids Default: SUM
-
+
item_dnn
DNN
required
dnn layers applied on item features
-
+
capsule_config
Capsule
required
-
+
simi_pow
float
@@ -5554,65 +5554,65 @@ MIND
similarity power, the paper says that the big
the better Default: 10
-
+
simi_func
Similarity
optional
Default: COSINE
-
+
scale_simi
bool
optional
add a layer for scaling the similarity Default: true
-
+
l2_regularization
float
required
Default: 0.0001
-
+
time_id_fea
string
optional
-
+
item_id
string
optional
-
+
ignore_in_batch_neg_sam
bool
optional
Default: false
-
+
max_interests_simi
float
optional
Default: 1
-
+
-
-
-
-
+
+
+
+
MIND.UserSeqCombineMethod
@@ -5620,740 +5620,740 @@ MIND.UserSeqCombineMethod
Name Number Description
-
+
CONCAT
0
-
+
SUM
1
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/mmoe.proto Top
-
+
ExpertTower
-
+
Field Type Label Description
-
+
expert_name
string
required
-
+
dnn
DNN
required
-
+
-
-
-
+
+
+
MMoE
-
+
Field Type Label Description
-
+
experts
ExpertTower
repeated
deprecated: original mmoe experts config
-
+
expert_dnn
DNN
optional
mmoe expert dnn layer definition
-
+
num_expert
uint32
optional
number of mmoe experts Default: 0
-
+
task_towers
TaskTower
repeated
task tower
-
+
l2_regularization
float
required
l2 regularization Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/multi_tower.proto Top
-
+
BSTTower
-
+
Field Type Label Description
-
+
input
string
required
-
+
seq_len
uint32
required
Default: 5
-
+
multi_head_size
uint32
required
Default: 4
-
+
-
-
-
+
+
+
DINTower
-
+
Field Type Label Description
-
+
input
string
required
-
+
dnn
DNN
required
-
+
-
-
-
+
+
+
MultiTower
-
+
Field Type Label Description
-
+
towers
Tower
repeated
-
+
final_dnn
DNN
required
-
+
l2_regularization
float
required
Default: 0.0001
-
+
din_towers
DINTower
repeated
-
+
bst_towers
BSTTower
repeated
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/optimizer.proto Top
-
+
AdagradOptimizer
Configuration message for the AdagradOptimizer See: https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
-
-
-
+
+
+
AdamAsyncOptimizer
Only available on pai-tf, which has better performance than AdamOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
beta1
float
optional
Default: 0.9
-
+
beta2
float
optional
Default: 0.999
-
+
-
-
-
+
+
+
AdamAsyncWOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
weight_decay
float
optional
Default: 1e-06
-
+
beta1
float
optional
Default: 0.9
-
+
beta2
float
optional
Default: 0.999
-
+
-
-
-
+
+
+
AdamOptimizer
Configuration message for the AdamOptimizer See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
beta1
float
optional
Default: 0.9
-
+
beta2
float
optional
Default: 0.999
-
+
-
-
-
+
+
+
AdamWOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
weight_decay
float
optional
Default: 1e-06
-
+
beta1
float
optional
Default: 0.9
-
+
beta2
float
optional
Default: 0.999
-
+
-
-
-
+
+
+
ConstantLearningRate
Configuration message for a constant learning rate.
-
+
Field Type Label Description
-
+
learning_rate
float
optional
Default: 0.002
-
+
-
-
-
+
+
+
CosineDecayLearningRate
Configuration message for a cosine decaying learning rate as defined in utils/learning_schedules.py
-
+
Field Type Label Description
-
+
learning_rate_base
float
optional
Default: 0.002
-
+
total_steps
uint32
optional
Default: 4000000
-
+
warmup_learning_rate
float
optional
Default: 0.0002
-
+
warmup_steps
uint32
optional
Default: 10000
-
+
hold_base_rate_steps
uint32
optional
Default: 0
-
+
-
-
-
+
+
+
ExponentialDecayLearningRate
Configuration message for an exponentially decaying learning rate. See https://www.tensorflow.org/versions/master/api_docs/python/train/ \ decaying_the_learning_rate#exponential_decay
-
+
Field Type Label Description
-
+
initial_learning_rate
float
optional
Default: 0.002
-
+
decay_steps
uint32
optional
Default: 4000000
-
+
decay_factor
float
optional
Default: 0.95
-
+
staircase
bool
optional
Default: true
-
+
burnin_learning_rate
float
optional
Default: 0
-
+
burnin_steps
uint32
optional
Default: 0
-
+
min_learning_rate
float
optional
Default: 0
-
+
-
-
-
+
+
+
FtrlOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
optional float learning_rate = 1 [default=1e-4];
-
+
learning_rate_power
float
optional
Default: -0.5
-
+
initial_accumulator_value
float
optional
Default: 0.1
-
+
l1_reg
float
optional
Default: 0
-
+
l2_reg
float
optional
Default: 0
-
+
l2_shrinkage_reg
float
optional
Default: 0
-
+
-
-
-
+
+
+
LearningRate
Configuration message for optimizer learning rate.
-
+
-
-
-
+
+
+
ManualStepLearningRate
Configuration message for a manually defined learning rate schedule.
-
+
Field Type Label Description
-
+
initial_learning_rate
float
optional
Default: 0.002
-
+
schedule
ManualStepLearningRate.LearningRateSchedule
repeated
-
+
warmup
bool
@@ -6361,566 +6361,566 @@ ManualStepLearningRate
Whether to linearly interpolate learning rates for steps in
[0, schedule[0].step]. Default: false
-
+
-
-
-
+
+
+
ManualStepLearningRate.LearningRateSchedule
-
+
Field Type Label Description
-
+
step
uint32
optional
-
+
learning_rate
float
optional
Default: 0.002
-
+
-
-
-
+
+
+
MomentumOptimizer
Configuration message for the MomentumOptimizer See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
momentum_optimizer_value
float
optional
Default: 0.9
-
+
-
-
-
+
+
+
MomentumWOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
weight_decay
float
optional
Default: 1e-06
-
+
momentum_optimizer_value
float
optional
Default: 0.9
-
+
-
-
-
+
+
+
Optimizer
Top level optimizer message.
-
+
-
-
-
+
+
+
PolyDecayLearningRate
Configuration message for a poly decaying learning rate. See https://www.tensorflow.org/api_docs/python/tf/train/polynomial_decay.
-
+
Field Type Label Description
-
+
learning_rate_base
float
required
-
+
total_steps
int64
required
-
+
power
float
required
-
+
end_learning_rate
float
optional
Default: 0
-
+
-
-
-
+
+
+
RMSPropOptimizer
Configuration message for the RMSPropOptimizer See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
-
+
Field Type Label Description
-
+
learning_rate
LearningRate
optional
-
+
momentum_optimizer_value
float
optional
Default: 0.9
-
+
decay
float
optional
Default: 0.9
-
+
epsilon
float
optional
Default: 1
-
+
-
-
-
+
+
+
-
+
Field Type Label Description
-
+
learning_rate_base
float
required
-
+
hidden_size
int32
required
-
+
warmup_steps
int32
required
-
+
step_scaling_rate
float
optional
Default: 1
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/pdn.proto Top
-
-
-
-
-
-
+
+
+
+
+
+
easy_rec/python/protos/pipeline.proto Top
-
+
EasyRecConfig
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/ple.proto Top
-
+
-
+
Field Type Label Description
-
+
network_name
string
required
-
+
expert_num_per_task
uint32
required
number of experts per task
-
+
share_num
uint32
@@ -6928,14 +6928,14 @@
number of experts for share
For the last extraction_network, no need to configure this
-
+
task_expert_net
DNN
required
dnn network of experts per task
-
+
share_expert_net
DNN
@@ -6943,141 +6943,141 @@
dnn network of experts for share
For the last extraction_network, no need to configure this
-
+
-
-
-
+
+
+
PLE
-
+
Field Type Label Description
-
+
extraction_networks
ExtractionNetwork
repeated
extraction network
-
+
task_towers
TaskTower
repeated
task tower
-
+
l2_regularization
float
optional
l2 regularization Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/rocket_launching.proto Top
-
+
RocketLaunching
-
+
Field Type Label Description
-
+
share_dnn
DNN
required
-
+
booster_dnn
DNN
required
-
+
light_dnn
DNN
required
-
+
l2_regularization
float
optional
Default: 0.0001
-
+
feature_based_distillation
bool
optional
Default: false
-
+
feature_distillation_function
Similarity
optional
COSINE = 0; EUCLID = 1; Default: COSINE
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/simi.proto Top
-
-
+
+
Similarity
@@ -7085,172 +7085,172 @@ Similarity
Name Number Description
-
+
COSINE
0
-
+
INNER_PRODUCT
1
-
+
EUCLID
2
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/simple_multi_task.proto Top
-
+
SimpleMultiTask
-
+
Field Type Label Description
-
+
task_towers
TaskTower
repeated
-
+
l2_regularization
float
required
Default: 0.0001
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/tower.proto Top
-
+
BayesTaskTower
-
+
Field Type Label Description
-
+
tower_name
string
required
task name for the task tower
-
+
label_name
string
optional
label for the task, default is label_fields by order
-
+
metrics_set
EvalMetrics
repeated
metrics for the task
-
+
loss_type
LossType
optional
loss for the task Default: CLASSIFICATION
-
+
num_class
uint32
optional
num_class for multi-class classification loss Default: 1
-
+
dnn
DNN
optional
task specific dnn
-
+
relation_tower_names
string
repeated
related tower names
-
+
relation_dnn
DNN
optional
relation dnn
-
+
weight
float
optional
training loss weights Default: 1
-
+
task_space_indicator_label
string
optional
label name for indcating the sample space for the task tower
-
+
in_task_space_weight
float
optional
the loss weight for sample in the task space Default: 1
-
+
out_task_space_weight
float
@@ -7262,170 +7262,170 @@ BayesTaskTower
prediction weights
optional float prediction_weight = 14 [default = 1.0]; Default: 1
-
+
-
-
-
+
+
+
TaskTower
-
+
Field Type Label Description
-
+
tower_name
string
required
task name for the task tower
-
+
label_name
string
optional
label for the task, default is label_fields by order
-
+
metrics_set
EvalMetrics
repeated
metrics for the task
-
+
loss_type
LossType
optional
loss for the task Default: CLASSIFICATION
-
+
num_class
uint32
optional
num_class for multi-class classification loss Default: 1
-
+
dnn
DNN
optional
task specific dnn
-
+
weight
float
optional
training loss weights Default: 1
-
+
task_space_indicator_label
string
optional
label name for indcating the sample space for the task tower
-
+
in_task_space_weight
float
optional
the loss weight for sample in the task space Default: 1
-
+
out_task_space_weight
float
optional
the loss weight for sample out the task space Default: 1
-
+
-
-
-
+
+
+
Tower
-
+
Field Type Label Description
-
+
input
string
required
-
+
dnn
DNN
required
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/train.proto Top
-
+
TrainConfig
Message for configuring EasyRecModel training jobs (train.py). Next id: 25
-
+
Field Type Label Description
-
+
optimizer_config
Optimizer
repeated
optimizer options
-
+
gradient_clipping_by_norm
float
optional
If greater than 0, clips gradients by this value. Default: 0
-
+
num_steps
uint32
@@ -7433,21 +7433,21 @@ TrainConfig
Number of steps to train the models: if 0, will train the model
indefinitely. Default: 0
-
+
fine_tune_checkpoint
string
optional
Checkpoint to restore variables from.
-
+
fine_tune_ckpt_var_map
string
optional
-
+
sync_replicas
bool
@@ -7455,7 +7455,7 @@ TrainConfig
Whether to synchronize replicas during training.
In case so, build a SyncReplicateOptimizer Default: true
-
+
sparse_accumulator_type
string
@@ -7465,7 +7465,7 @@ TrainConfig
raw, hash, multi_map, list, parallel
in general, multi_map runs faster than other options. Default: multi_map
-
+
startup_delay_steps
float
@@ -7473,56 +7473,56 @@ TrainConfig
Number of training steps between replica startup.
This flag must be set to 0 if sync_replicas is set to true. Default: 15
-
+
save_checkpoints_steps
uint32
optional
Step interval for saving checkpoint Default: 1000
-
+
save_checkpoints_secs
uint32
optional
Seconds interval for saving checkpoint
-
+
keep_checkpoint_max
uint32
optional
Max checkpoints to keep Default: 10
-
+
save_summary_steps
uint32
optional
Save summaries every this many steps. Default: 1000
-
+
log_step_count_steps
uint32
optional
The frequency global step/sec and the loss will be logged during training. Default: 10
-
+
is_profiling
bool
optional
profiling or not Default: false
-
+
force_restore_shape_compatible
bool
optional
if variable shape is incompatible, clip or pad variables in checkpoint Default: false
-
+
train_distribute
DistributionStrategy
@@ -7531,21 +7531,21 @@ TrainConfig
- mirrored: MirroredStrategy, single machine and multiple devices;
- collective: CollectiveAllReduceStrategy, multiple machines and multiple devices. Default: NoStrategy
-
+
num_gpus_per_worker
int32
optional
Number of gpus per machine Default: 1
-
+
summary_model_vars
bool
optional
summary model variables or not Default: false
-
+
protocol
string
@@ -7554,51 +7554,51 @@ TrainConfig
grpc++: https://help.aliyun.com/document_detail/173157.html?spm=5176.10695662.1996646101.searchclickresult.3ebf450evuaPT3
star_server: https://help.aliyun.com/document_detail/173154.html?spm=a2c4g.11186623.6.627.39ad7e3342KOX4
-
+
inter_op_parallelism_threads
int32
optional
inter_op_parallelism_threads Default: 0
-
+
intra_op_parallelism_threads
int32
optional
intra_op_parallelism_threads Default: 0
-
+
tensor_fuse
bool
optional
tensor fusion on PAI-TF Default: false
-
+
write_graph
bool
optional
write graph into graph.pbtxt and summary or not Default: true
-
+
freeze_gradient
string
repeated
match variable patterns to freeze
-
+
-
-
-
-
+
+
+
+
DistributionStrategy
@@ -7606,130 +7606,130 @@ DistributionStrategy
Name Number Description
-
+
NoStrategy
0
use old SyncReplicasOptimizer for ParameterServer training
-
+
PSStrategy
1
PSStrategy with multiple gpus on one node could not work
on pai-tf, could only work on TF >=1.15
-
+
MirroredStrategy
2
could only work on PaiTF or TF >=1.15
single worker multiple gpu mode
-
+
CollectiveAllReduceStrategy
3
Depreciated
-
+
ExascaleStrategy
4
currently not working good
-
+
MultiWorkerMirroredStrategy
5
multi worker multi gpu mode
see tf.distribute.experimental.MultiWorkerMirroredStrategy
-
+
-
-
-
-
-
+
+
+
+
+
easy_rec/python/protos/variational_dropout.proto Top
-
+
VariationalDropoutLayer
-
+
Field Type Label Description
-
+
regularization_lambda
float
optional
regularization coefficient lambda Default: 0.01
-
+
embedding_wise_variational_dropout
bool
optional
variational_dropout dimension Default: false
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
easy_rec/python/protos/wide_and_deep.proto Top
-
+
WideAndDeep
-
+
Field Type Label Description
-
+
wide_output_dim
uint32
required
Default: 1
-
+
dnn
DNN
required
-
+
final_dnn
DNN
@@ -7737,28 +7737,28 @@ WideAndDeep
if set, the output of dnn and wide part are concatenated and
passed to the final_dnn; otherwise, they are summarized
-
+
l2_regularization
float
optional
Default: 0.0001
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
Scalar Value Types
@@ -7766,7 +7766,7 @@ Scalar Value Types
.proto Type Notes C++ Type Java Type Python Type
-
+
double
@@ -7774,7 +7774,7 @@ Scalar Value Types
double
float
-
+
float
@@ -7782,7 +7782,7 @@ Scalar Value Types
float
float
-
+
int32
Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint32 instead.
@@ -7790,7 +7790,7 @@ Scalar Value Types
int
int
-
+
int64
Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint64 instead.
@@ -7798,7 +7798,7 @@ Scalar Value Types
long
int/long
-
+
uint32
Uses variable-length encoding.
@@ -7806,7 +7806,7 @@ Scalar Value Types
int
int/long
-
+
uint64
Uses variable-length encoding.
@@ -7814,7 +7814,7 @@ Scalar Value Types
long
int/long
-
+
sint32
Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s.
@@ -7822,7 +7822,7 @@ Scalar Value Types
int
int
-
+
sint64
Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s.
@@ -7830,7 +7830,7 @@ Scalar Value Types
long
int/long
-
+
fixed32
Always four bytes. More efficient than uint32 if values are often greater than 2^28.
@@ -7838,7 +7838,7 @@ Scalar Value Types
int
int
-
+
fixed64
Always eight bytes. More efficient than uint64 if values are often greater than 2^56.
@@ -7846,7 +7846,7 @@ Scalar Value Types
long
int/long
-
+
sfixed32
Always four bytes.
@@ -7854,7 +7854,7 @@ Scalar Value Types
int
int
-
+
sfixed64
Always eight bytes.
@@ -7862,7 +7862,7 @@ Scalar Value Types
long
int/long
-
+
bool
@@ -7870,7 +7870,7 @@ Scalar Value Types
boolean
boolean
-
+
string
A string must always contain UTF-8 encoded or 7-bit ASCII text.
@@ -7878,7 +7878,7 @@ Scalar Value Types
String
str/unicode
-
+
bytes
May contain any arbitrary sequence of bytes.
@@ -7886,9 +7886,8 @@ Scalar Value Types
ByteString
str
-
+