benfred · benfred · Mar 26, 2022 · Mar 26, 2022 · Mar 26, 2022 · Mar 26, 2022
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -30,7 +30,14 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_rtd_theme", "nbsphinx"]
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx_rtd_theme",
+    "nbsphinx",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.githubpages",
+]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -112,3 +119,13 @@
         "donate.html",
     ]
 }
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+}
+
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+}
diff --git a/implicit/als.py b/implicit/als.py
@@ -49,7 +49,7 @@ def AlternatingLeastSquares(
         The number of threads to use for fitting the model. This only
         applies for the native extensions. Specifying 0 means to default
         to the number of cores on the machine.
-    random_state : int, RandomState or None, optional
+    random_state : int, np.random.RandomState or None, optional
         The random state for seeding the initial item and user factors.
         Default is None.
     """

diff --git a/implicit/ann/annoy.py b/implicit/ann/annoy.py
@@ -236,3 +236,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
             "similar_users isn't implemented with Annoy yet. (note: you can call "
             " self.model.similar_models to get the same functionality on the inner model class)"
         )
+
+    def save(self, file):
+        raise NotImplementedError(".save isn't implemented for Annoy yet")
+
+    @classmethod
+    def load(cls, file):
+        raise NotImplementedError(".load isn't implemented for Annoy yet")
diff --git a/implicit/ann/faiss.py b/implicit/ann/faiss.py
@@ -280,3 +280,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
             "similar_users isn't implemented with Faiss yet. (note: you can call "
             " self.model.similar_models to get the same functionality on the inner model class)"
         )
+
+    def save(self, file):
+        raise NotImplementedError(".save isn't implemented for Faiss yet")
+
+    @classmethod
+    def load(cls, file):
+        raise NotImplementedError(".load isn't implemented for Faiss yet")
diff --git a/implicit/ann/nmslib.py b/implicit/ann/nmslib.py
@@ -239,3 +239,10 @@ def similar_users(self, userid, N=10, filter_users=None, users=None):
             "similar_users isn't implemented with NMSLib yet. (note: you can call "
             " self.model.similar_models to get the same functionality on the inner model class)"
         )
+
+    def save(self, file):
+        raise NotImplementedError(".save isn't implemented for NMSLib yet")
+
+    @classmethod
+    def load(cls, file):
+        raise NotImplementedError(".load isn't implemented for NMSLib yet")
diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
@@ -44,7 +44,7 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
         The number of threads to use for fitting the model. This only
         applies for the native extensions. Specifying 0 means to default
         to the number of cores on the machine.
-    random_state : int, RandomState or None, optional
+    random_state : int, numpy.random.RandomState or None, optional
         The random state for seeding the initial item and user factors.
         Default is None.
 
@@ -76,7 +76,7 @@ def __init__(
         self.regularization = regularization
 
         # options on how to fit the model
-        self.dtype = dtype
+        self.dtype = np.dtype(dtype)
         self.use_native = use_native
         self.use_cg = use_cg
         self.iterations = iterations
@@ -424,6 +424,35 @@ def to_gpu(self):
         ret.item_factors = implicit.gpu.Matrix(self.item_factors)
         return ret
 
+    def save(self, file):
+        np.savez(
+            file,
+            user_factors=self.user_factors,
+            item_factors=self.item_factors,
+            regularization=self.regularization,
+            factors=self.factors,
+            num_threads=self.num_threads,
+            iterations=self.iterations,
+            use_native=self.use_native,
+            use_cg=self.use_cg,
+            cg_steps=self.cg_steps,
+            calculate_training_loss=self.calculate_training_loss,
+            dtype=self.dtype.name,
+        )
+
+    @classmethod
+    def load(cls, file):
+        if isinstance(file, str) and not file.endswith(".npz"):
+            file = file + ".npz"
+        with np.load(file, allow_pickle=False) as data:
+            ret = cls()
+            for k, v in data.items():
+                if k == "dtype":
+                    ret.dtype = np.dtype(str(v))
+                else:
+                    setattr(ret, k, v)
+            return ret
+
 
 def least_squares(Cui, X, Y, regularization, num_threads=0):
     """For each user in Cui, calculate factors Xu for them

diff --git a/implicit/cpu/bpr.pyx b/implicit/cpu/bpr.pyx
@@ -117,7 +117,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
         self.learning_rate = learning_rate
         self.iterations = iterations
         self.regularization = regularization
-        self.dtype = dtype
+        self.dtype = np.dtype(dtype)
         self.num_threads = num_threads
         self.verify_negative_samples = verify_negative_samples
         self.random_state = random_state
@@ -204,7 +204,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
 
         self._check_fit_errors()
 
-    def to_gpu(self):
+    def to_gpu(self) -> "implicit.gpu.bpr.BayesianPersonalizedRanking":
         """Converts this model to an equivalent version running on the gpu"""
         import implicit.gpu.bpr
 
@@ -219,6 +219,33 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
         ret.item_factors = implicit.gpu.Matrix(self.item_factors)
         return ret
 
+    def save(self, file):
+        np.savez(
+            file,
+            user_factors=self.user_factors,
+            item_factors=self.item_factors,
+            regularization=self.regularization,
+            factors=self.factors,
+            learning_rate=self.learning_rate,
+            verify_negative_samples=self.verify_negative_samples,
+            num_threads=self.num_threads,
+            iterations=self.iterations,
+            dtype=self.dtype.name,
+        )
+
+    @classmethod
+    def load(cls, file):
+        if isinstance(file, str) and not file.endswith(".npz"):
+            file = file + ".npz"
+        with np.load(file, allow_pickle=False) as data:
+            ret = cls()
+            for k, v in data.items():
+                if k == "dtype":
+                    ret.dtype = np.dtype(str(v))
+                else:
+                    setattr(ret, k, v)
+            return ret
+
 
 @cython.cdivision(True)
 @cython.boundscheck(False)

diff --git a/implicit/cpu/lmf.pyx b/implicit/cpu/lmf.pyx
@@ -107,7 +107,7 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
         self.learning_rate = learning_rate
         self.iterations = iterations
         self.regularization = regularization
-        self.dtype = dtype
+        self.dtype = np.dtype(dtype)
         self.num_threads = num_threads
         self.neg_prop = neg_prop
         self.random_state = random_state
@@ -198,6 +198,33 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
 
         self._check_fit_errors()
 
+    def save(self, file):
+        np.savez(
+            file,
+            user_factors=self.user_factors,
+            item_factors=self.item_factors,
+            regularization=self.regularization,
+            factors=self.factors,
+            learning_rate=self.learning_rate,
+            neg_prop=self.neg_prop,
+            num_threads=self.num_threads,
+            iterations=self.iterations,
+            dtype=self.dtype.name,
+        )
+
+    @classmethod
+    def load(cls, file):
+        if isinstance(file, str) and not file.endswith(".npz"):
+            file = file + ".npz"
+        with np.load(file, allow_pickle=False) as data:
+            ret = cls()
+            for k, v in data.items():
+                if k == "dtype":
+                    ret.dtype = np.dtype(str(v))
+                else:
+                    setattr(ret, k, v)
+            return ret
+
 
 @cython.cdivision(True)
 cdef inline floating sigmoid(floating x) nogil:

diff --git a/implicit/gpu/als.py b/implicit/gpu/als.py
@@ -269,14 +269,14 @@ def XtX(self):
             self.solver.calculate_yty(self.user_factors, self._XtX, self.regularization)
         return self._XtX
 
-    def to_cpu(self):
+    def to_cpu(self) -> implicit.cpu.als.AlternatingLeastSquares:
         """Converts this model to an equivalent version running on the CPU"""
         ret = implicit.cpu.als.AlternatingLeastSquares(
             factors=self.factors,
             regularization=self.regularization,
             iterations=self.iterations,
             calculate_training_loss=self.calculate_training_loss,
         )
-        ret.user_factors = self.user_factors.to_numpy()
-        ret.item_factors = self.item_factors.to_numpy()
+        ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
+        ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None
         return ret
diff --git a/implicit/gpu/bpr.py b/implicit/gpu/bpr.py
@@ -150,7 +150,7 @@ def fit(self, user_items, show_progress=True):
                         }
                     )
 
-    def to_cpu(self):
+    def to_cpu(self) -> implicit.cpu.bpr.BayesianPersonalizedRanking:
         """Converts this model to an equivalent version running on the cpu"""
         ret = implicit.cpu.bpr.BayesianPersonalizedRanking(
             factors=self.factors,
@@ -159,6 +159,6 @@ def to_cpu(self):
             iterations=self.iterations,
             verify_negative_samples=self.verify_negative_samples,
         )
-        ret.user_factors = self.user_factors.to_numpy()
-        ret.item_factors = self.item_factors.to_numpy()
+        ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
+        ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None
         return ret
diff --git a/implicit/gpu/matrix_factorization_base.py b/implicit/gpu/matrix_factorization_base.py
@@ -207,6 +207,13 @@ def recalculate_user(self, userid, user_items):
     def recalculate_item(self, itemid, item_users):
         raise NotImplementedError("recalculate_item is not supported with this model")
 
+    @classmethod
+    def load(cls, file):
+        return cls().to_cpu().load(file).to_gpu()
+
+    def save(self, file):
+        self.to_cpu().save(file)
+
     def __getstate__(self):
         return {
             "item_factors": self.item_factors.to_numpy() if self.item_factors else None,

diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py
@@ -152,24 +152,26 @@ def __setstate__(self, state):
         else:
             self.scorer = None
 
-    def save(self, filename):
+    def save(self, file):
         m = self.similarity
-        np.savez(filename, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K)
+        np.savez(file, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K)
 
     @classmethod
-    def load(cls, filename):
+    def load(cls, file):
         # numpy.save automatically appends a npz suffic, numpy.load doesn't apparently
-        if not filename.endswith(".npz"):
-            filename = filename + ".npz"
+        if isinstance(file, str) and not file.endswith(".npz"):
+            file = file + ".npz"
 
-        m = np.load(filename)
-        similarity = csr_matrix((m["data"], m["indices"], m["indptr"]), shape=m["shape"])
+        with np.load(file, allow_pickle=False) as data:
+            similarity = csr_matrix(
+                (data["data"], data["indices"], data["indptr"]), shape=data["shape"]
+            )
 
-        ret = cls()
-        ret.similarity = similarity
-        ret.scorer = NearestNeighboursScorer(similarity)
-        ret.K = m["K"]
-        return ret
+            ret = cls()
+            ret.similarity = similarity
+            ret.scorer = NearestNeighboursScorer(similarity)
+            ret.K = data["K"]
+            return ret
 
 
 class CosineRecommender(ItemItemRecommender):

diff --git a/implicit/recommender_base.py b/implicit/recommender_base.py
@@ -7,11 +7,9 @@ class ModelFitError(Exception):
     pass
 
 
-class RecommenderBase:
+class RecommenderBase(metaclass=ABCMeta):
     """Defines a common interface for all recommendation models"""
 
-    __metaclass__ = ABCMeta
-
     @abstractmethod
     def fit(self, user_items, show_process=True):
         """
@@ -152,6 +150,42 @@ def similar_items(
             Tuple of (itemids, scores) arrays
         """
 
+    @abstractmethod
+    def save(self, file):
+        """Saves the model to a file, using the numpy `.npz` format
+
+        Parameters
+        ----------
+        file : str or io.IOBase
+            Either the filename or an open file-like object to save the model to
+
+        See Also
+        --------
+        load
+        numpy.savez
+        """
+
+    @classmethod
+    @abstractmethod
+    def load(cls, file) -> "RecommenderBase":
+        """Loads the model from a file
+
+        Parameters
+        ----------
+        file : str or io.IOBase
+            Either the filename or an open file-like object to load the model from
+
+        Returns
+        -------
+        RecommenderBase
+            The model loaded up from disk
+
+        See Also
+        --------
+        save
+        numpy.load
+        """
+
     def rank_items(self, userid, user_items, selected_items, recalculate_user=False):
         warnings.warn(
             "rank_items is deprecated. Use recommend with the 'items' parameter instead",