This project is about recommendation system including rank&match models and metrics which are all implemented by tensorflow 2.x
.
You can use these models with model.fit()
,and model.predict()
through tf.keras.Model
.
The implement for tensorflow 1.x
is in this github.
......
means that it will be continuously updated.
model | paper | blog | implemented |
---|---|---|---|
...... | |||
STEM | [KDD 2024] Ads Recommendation in a Collapsed and Entangled World | zhihu | ✅ |
PEPNet | [KDD 2023] PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information | zhihu | ✅ |
M2M | [CIKM 2022] Leaving No One Behind: A Multi-Scenario Multi-Task Meta Learning Approach for Advertiser Modeling | zhihu | ✅ |
SAR-Net | [CIKM 2021] SAR-Net: A Scenario-Aware Ranking Network for Personalized Fair Recommendation in Hundreds of Travel Scenarios | zhihu | |
Star | [CIKM 2021] One Model to Serve All: Star Topology Adaptive Recommender for Multi-Domain CTR Prediction | zhihu | ✅ |
PLE | [RecSys 2020] Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations | zhihu | ✅ |
MMoE | [KDD 2018] Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts | zhihu | ✅ |
model | paper | blog | implemented |
---|---|---|---|
...... | |||
Dual Augmented Two-tower Model | [DLP-KDD 2021] A Dual Augmented Two-tower Model for Online Large-scale Recommendation | zhihu | |
ComiRec | [KDD 2020] Controllable Multi-Interest Framework for Recommendation | zhihu | |
MIND | [CIKM 2019] Multi-Interest Network with Dynamic Routing for Recommendation at Tmall | zhihu | |
Youtube DNN | [RecSys 2016] Deep Neural Networks for YouTube Recommendations | zhihu |
Metrics for recommendation system.
It will be coming soon.
import numpy as np
import tensorflow as tf
from recsys.multidomain.pepnet import pepnet, Field, Task
task_list = [
Task(name='click'),
Task(name='like'),
Task(name='fav')
]
num_domain = 3
def create_model():
fields = [
Field('uid', vocabulary_size=100),
Field('item_id', vocabulary_size=20, belong='item'),
Field('his_item_id', vocabulary_size=20, emb='item_id', length=20, belong='history'),
Field('context_id', vocabulary_size=20, belong='context'),
# domain's fields
Field(f'domain_id', vocabulary_size=num_domain, belong='domain'),
Field(f'domain_impression', vocabulary_size=1, belong='domain', dtype="float32")
]
model = pepnet(fields, task_list, [64, 32],
history_agg='attention', agg_kwargs={}
# history_agg='transformer', agg_kwargs={'num_layers': 1, 'd_model': 4, 'num_heads': 2, 'dff': 64}
)
print(model.summary())
return model
def create_dataset():
n_samples = 2000
np.random.seed(2024)
data = {
'uid': np.random.randint(0, 100, [n_samples]),
'item_id': np.random.randint(0, 20, [n_samples]),
'his_item_id': np.random.randint(0, 20, [n_samples, 20]),
'context_id': np.random.randint(0, 20, [n_samples]),
'domain_id': np.random.randint(0, num_domain, [n_samples]),
'domain_impression': np.random.random([n_samples])
}
labels = {t.name: np.random.randint(0, 2, [n_samples]) for t in task_list}
return data, labels
if __name__ == '__main__':
model = create_model()
data, labels = create_dataset()
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
model.fit(data, labels, batch_size=32, epochs=10)
Those layers with prefix "dnn" will use the adam optimizer, and adagrad for prefix "embedding". Also, you must have the default optimizer for legacy layers.
import tensorflow as tf
from examples.pepnet import create_model, create_dataset
def train(data, labels):
model = create_model()
model.compile(optimizer={'dnn': 'adam', 'embedding': 'Adagrad', 'default': 'adam'},
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(data, labels, batch_size=32, epochs=10)
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.save('./pepnet-saved/model.ckpt')
print(model({k: v[:10] for k, v in data.items()}))
print(model.optimizer['embedding'].variables())
def restore(data):
model = create_model()
model.compile(optimizer={'dnn': 'adam', 'embedding': 'Adagrad', 'default': 'adam'},
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore('./pepnet-saved/model.ckpt-1')
print(model({k: v[:10] for k, v in data.items()}))
for layer in model.optimizer:
model.optimizer[layer].build(model.special_layer_variables[layer])
print(model.optimizer['embedding'].variables())
if __name__ == '__main__':
data, labels = create_dataset()
train(data, labels)
restore(data)