forked from Applied-Machine-Learning-Lab/LinRec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ml-1m.yaml
50 lines (44 loc) · 3.58 KB
/
ml-1m.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Atomic File Format
field_separator: "\t" # (str) Separator of different columns in atomic files.
seq_separator: " " # (str) Separator inside the sequence features.
# Basic Information
USER_ID_FIELD: user_id # (str) Field name of user ID feature.
ITEM_ID_FIELD: item_id # (str) Field name of item ID feature.
RATING_FIELD: rating # (str) Field name of rating feature.
TIME_FIELD: timestamp # (str) Field name of timestamp feature.
seq_len: ~ # (dict) Field name of sequence feature: maximum length of each sequence
LABEL_FIELD: label # (str) Expected field name of the generated labels for point-wise dataLoaders.
threshold: ~ # (dict) 0/1 labels will be generated according to the pairs.
NEG_PREFIX: neg_ # (str) Negative sampling prefix for pair-wise dataLoaders.
# Sequential Model Needed
ITEM_LIST_LENGTH_FIELD: item_length # (str) Field name of the feature representing item sequences' length.
LIST_SUFFIX: _list # (str) Suffix of field names which are generated as sequences.
MAX_ITEM_LIST_LENGTH: 200 # (int) Maximum length of each generated sequence.
POSITION_FIELD: position_id # (str) Field name of the generated position sequence.
# Knowledge-based Model Needed
HEAD_ENTITY_ID_FIELD: head_id # (str) Field name of the head entity ID feature.
TAIL_ENTITY_ID_FIELD: tail_id # (str) Field name of the tail entity ID feature.
RELATION_ID_FIELD: relation_id # (str) Field name of the relation ID feature.
ENTITY_ID_FIELD: entity_id # (str) Field name of the entity ID.
kg_reverse_r: False # (bool) Whether to reverse relations of triples for bidirectional edges.
entity_kg_num_interval: ~ # (str) Entity interval for filtering kg, such as [A,B] / [A,B) / (A,B) / (A,B].
relation_kg_num_interval: ~ # (str) Relation interval for filtering kg, such as [A,B] / [A,B) / (A,B) / (A,B].
# Selectively Loading
load_col: # (dict) The suffix of atomic files: (list) field names to be loaded.
inter: [user_id, item_id, rating, timestamp]
unload_col: ~ # (dict) The suffix of atomic files: (list) field names NOT to be loaded.
unused_col: ~ # (dict) The suffix of atomic files: (list) field names which are loaded but not used.
# Filtering
rm_dup_inter: ~ # (str) Whether to remove duplicated user-item interactions.
val_interval: ~ # (dict) Filter inter by values in {value field (str): interval (str)}.
filter_inter_by_user_or_item: True # (bool) Whether or not to filter inter by user or item.
user_inter_num_interval: ~ # (str) User interval for filtering inter, such as [A,B] / [A,B) / (A,B) / (A,B].
item_inter_num_interval: ~ # (str) Item interval for filtering inter, such as [A,B] / [A,B) / (A,B) / (A,B].
# Preprocessing
alias_of_user_id: ~ # (list) Fields' names remapped into the same index system with USER_ID_FIELD.
alias_of_item_id: ~ # (list) Fields' names remapped into the same index system with ITEM_ID_FIELD.
alias_of_entity_id: ~ # (list) Fields' names remapped into the same index system with ENTITY_ID_FIELD.
alias_of_relation_id: ~ # (list) Fields' names remapped into the same index system with RELATION_ID_FIELD.
preload_weight: ~ # (dict) Preloaded weight in {IDs (token): pretrained vectors (float-like)}.
normalize_field: ~ # (list) List of filed names to be normalized.
normalize_all: True # (bool) Whether or not to normalize all the float like fields.