Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add save/load functions for models #552

Merged
merged 5 commits into from
Mar 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme", "nbsphinx"]
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx_rtd_theme",
"nbsphinx",
"sphinx.ext.intersphinx",
"sphinx.ext.githubpages",
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
Expand Down Expand Up @@ -112,3 +119,13 @@
"donate.html",
]
}

intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"numpy": ("https://numpy.org/doc/stable/", None),
}

autodoc_default_options = {
"members": True,
"member-order": "bysource",
}
2 changes: 1 addition & 1 deletion implicit/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def AlternatingLeastSquares(
The number of threads to use for fitting the model. This only
applies for the native extensions. Specifying 0 means to default
to the number of cores on the machine.
random_state : int, RandomState or None, optional
random_state : int, np.random.RandomState or None, optional
The random state for seeding the initial item and user factors.
Default is None.
"""
Expand Down
7 changes: 7 additions & 0 deletions implicit/ann/annoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
"similar_users isn't implemented with Annoy yet. (note: you can call "
" self.model.similar_models to get the same functionality on the inner model class)"
)

def save(self, file):
raise NotImplementedError(".save isn't implemented for Annoy yet")

@classmethod
def load(cls, file):
raise NotImplementedError(".load isn't implemented for Annoy yet")
7 changes: 7 additions & 0 deletions implicit/ann/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
"similar_users isn't implemented with Faiss yet. (note: you can call "
" self.model.similar_models to get the same functionality on the inner model class)"
)

def save(self, file):
raise NotImplementedError(".save isn't implemented for Faiss yet")

@classmethod
def load(cls, file):
raise NotImplementedError(".load isn't implemented for Faiss yet")
7 changes: 7 additions & 0 deletions implicit/ann/nmslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
"similar_users isn't implemented with NMSLib yet. (note: you can call "
" self.model.similar_models to get the same functionality on the inner model class)"
)

def save(self, file):
raise NotImplementedError(".save isn't implemented for NMSLib yet")

@classmethod
def load(cls, file):
raise NotImplementedError(".load isn't implemented for NMSLib yet")
33 changes: 31 additions & 2 deletions implicit/cpu/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
The number of threads to use for fitting the model. This only
applies for the native extensions. Specifying 0 means to default
to the number of cores on the machine.
random_state : int, RandomState or None, optional
random_state : int, numpy.random.RandomState or None, optional
The random state for seeding the initial item and user factors.
Default is None.

Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(
self.regularization = regularization

# options on how to fit the model
self.dtype = dtype
self.dtype = np.dtype(dtype)
self.use_native = use_native
self.use_cg = use_cg
self.iterations = iterations
Expand Down Expand Up @@ -424,6 +424,35 @@ def to_gpu(self):
ret.item_factors = implicit.gpu.Matrix(self.item_factors)
return ret

def save(self, file):
np.savez(
file,
user_factors=self.user_factors,
item_factors=self.item_factors,
regularization=self.regularization,
factors=self.factors,
num_threads=self.num_threads,
iterations=self.iterations,
use_native=self.use_native,
use_cg=self.use_cg,
cg_steps=self.cg_steps,
calculate_training_loss=self.calculate_training_loss,
dtype=self.dtype.name,
)

@classmethod
def load(cls, file):
if isinstance(file, str) and not file.endswith(".npz"):
file = file + ".npz"
with np.load(file, allow_pickle=False) as data:
ret = cls()
for k, v in data.items():
if k == "dtype":
ret.dtype = np.dtype(str(v))
else:
setattr(ret, k, v)
return ret


def least_squares(Cui, X, Y, regularization, num_threads=0):
"""For each user in Cui, calculate factors Xu for them
Expand Down
31 changes: 29 additions & 2 deletions implicit/cpu/bpr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
self.learning_rate = learning_rate
self.iterations = iterations
self.regularization = regularization
self.dtype = dtype
self.dtype = np.dtype(dtype)
self.num_threads = num_threads
self.verify_negative_samples = verify_negative_samples
self.random_state = random_state
Expand Down Expand Up @@ -204,7 +204,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):

self._check_fit_errors()

def to_gpu(self):
def to_gpu(self) -> "implicit.gpu.bpr.BayesianPersonalizedRanking":
"""Converts this model to an equivalent version running on the gpu"""
import implicit.gpu.bpr

Expand All @@ -219,6 +219,33 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
ret.item_factors = implicit.gpu.Matrix(self.item_factors)
return ret

def save(self, file):
np.savez(
file,
user_factors=self.user_factors,
item_factors=self.item_factors,
regularization=self.regularization,
factors=self.factors,
learning_rate=self.learning_rate,
verify_negative_samples=self.verify_negative_samples,
num_threads=self.num_threads,
iterations=self.iterations,
dtype=self.dtype.name,
)

@classmethod
def load(cls, file):
if isinstance(file, str) and not file.endswith(".npz"):
file = file + ".npz"
with np.load(file, allow_pickle=False) as data:
ret = cls()
for k, v in data.items():
if k == "dtype":
ret.dtype = np.dtype(str(v))
else:
setattr(ret, k, v)
return ret


@cython.cdivision(True)
@cython.boundscheck(False)
Expand Down
29 changes: 28 additions & 1 deletion implicit/cpu/lmf.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
self.learning_rate = learning_rate
self.iterations = iterations
self.regularization = regularization
self.dtype = dtype
self.dtype = np.dtype(dtype)
self.num_threads = num_threads
self.neg_prop = neg_prop
self.random_state = random_state
Expand Down Expand Up @@ -198,6 +198,33 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):

self._check_fit_errors()

def save(self, file):
np.savez(
file,
user_factors=self.user_factors,
item_factors=self.item_factors,
regularization=self.regularization,
factors=self.factors,
learning_rate=self.learning_rate,
neg_prop=self.neg_prop,
num_threads=self.num_threads,
iterations=self.iterations,
dtype=self.dtype.name,
)

@classmethod
def load(cls, file):
if isinstance(file, str) and not file.endswith(".npz"):
file = file + ".npz"
with np.load(file, allow_pickle=False) as data:
ret = cls()
for k, v in data.items():
if k == "dtype":
ret.dtype = np.dtype(str(v))
else:
setattr(ret, k, v)
return ret


@cython.cdivision(True)
cdef inline floating sigmoid(floating x) nogil:
Expand Down
6 changes: 3 additions & 3 deletions implicit/gpu/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,14 @@ def XtX(self):
self.solver.calculate_yty(self.user_factors, self._XtX, self.regularization)
return self._XtX

def to_cpu(self):
def to_cpu(self) -> implicit.cpu.als.AlternatingLeastSquares:
"""Converts this model to an equivalent version running on the CPU"""
ret = implicit.cpu.als.AlternatingLeastSquares(
factors=self.factors,
regularization=self.regularization,
iterations=self.iterations,
calculate_training_loss=self.calculate_training_loss,
)
ret.user_factors = self.user_factors.to_numpy()
ret.item_factors = self.item_factors.to_numpy()
ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None
return ret
6 changes: 3 additions & 3 deletions implicit/gpu/bpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def fit(self, user_items, show_progress=True):
}
)

def to_cpu(self):
def to_cpu(self) -> implicit.cpu.bpr.BayesianPersonalizedRanking:
"""Converts this model to an equivalent version running on the cpu"""
ret = implicit.cpu.bpr.BayesianPersonalizedRanking(
factors=self.factors,
Expand All @@ -159,6 +159,6 @@ def to_cpu(self):
iterations=self.iterations,
verify_negative_samples=self.verify_negative_samples,
)
ret.user_factors = self.user_factors.to_numpy()
ret.item_factors = self.item_factors.to_numpy()
ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None
return ret
7 changes: 7 additions & 0 deletions implicit/gpu/matrix_factorization_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ def recalculate_user(self, userid, user_items):
def recalculate_item(self, itemid, item_users):
raise NotImplementedError("recalculate_item is not supported with this model")

@classmethod
def load(cls, file):
return cls().to_cpu().load(file).to_gpu()

def save(self, file):
self.to_cpu().save(file)

def __getstate__(self):
return {
"item_factors": self.item_factors.to_numpy() if self.item_factors else None,
Expand Down
26 changes: 14 additions & 12 deletions implicit/nearest_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,24 +152,26 @@ def __setstate__(self, state):
else:
self.scorer = None

def save(self, filename):
def save(self, file):
m = self.similarity
np.savez(filename, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K)
np.savez(file, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K)

@classmethod
def load(cls, filename):
def load(cls, file):
# numpy.save automatically appends a npz suffic, numpy.load doesn't apparently
if not filename.endswith(".npz"):
filename = filename + ".npz"
if isinstance(file, str) and not file.endswith(".npz"):
file = file + ".npz"

m = np.load(filename)
similarity = csr_matrix((m["data"], m["indices"], m["indptr"]), shape=m["shape"])
with np.load(file, allow_pickle=False) as data:
similarity = csr_matrix(
(data["data"], data["indices"], data["indptr"]), shape=data["shape"]
)

ret = cls()
ret.similarity = similarity
ret.scorer = NearestNeighboursScorer(similarity)
ret.K = m["K"]
return ret
ret = cls()
ret.similarity = similarity
ret.scorer = NearestNeighboursScorer(similarity)
ret.K = data["K"]
return ret


class CosineRecommender(ItemItemRecommender):
Expand Down
40 changes: 37 additions & 3 deletions implicit/recommender_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ class ModelFitError(Exception):
pass


class RecommenderBase:
class RecommenderBase(metaclass=ABCMeta):
"""Defines a common interface for all recommendation models"""

__metaclass__ = ABCMeta

@abstractmethod
def fit(self, user_items, show_process=True):
"""
Expand Down Expand Up @@ -152,6 +150,42 @@ def similar_items(
Tuple of (itemids, scores) arrays
"""

@abstractmethod
def save(self, file):
"""Saves the model to a file, using the numpy `.npz` format

Parameters
----------
file : str or io.IOBase
Either the filename or an open file-like object to save the model to

See Also
--------
load
numpy.savez
"""

@classmethod
@abstractmethod
def load(cls, file) -> "RecommenderBase":
"""Loads the model from a file

Parameters
----------
file : str or io.IOBase
Either the filename or an open file-like object to load the model from

Returns
-------
RecommenderBase
The model loaded up from disk

See Also
--------
save
numpy.load
"""

def rank_items(self, userid, user_items, selected_items, recalculate_user=False):
warnings.warn(
"rank_items is deprecated. Use recommend with the 'items' parameter instead",
Expand Down
Loading