benfred · benfred · Jun 3, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
@@ -410,14 +410,16 @@ def to_gpu(self):
             regularization=self.regularization,
             iterations=self.iterations,
             calculate_training_loss=self.calculate_training_loss,
+            random_state=self.random_state,
         )
-        ret.user_factors = implicit.gpu.Matrix(self.user_factors)
-        ret.item_factors = implicit.gpu.Matrix(self.item_factors)
+        if self.user_factors is not None:
+            ret.user_factors = implicit.gpu.Matrix(self.user_factors)
+        if self.item_factors is not None:
+            ret.item_factors = implicit.gpu.Matrix(self.item_factors)
         return ret
 
-    def save(self, file):
-        np.savez(
-            file,
+    def save(self, fileobj_or_path):
+        args = dict(
             user_factors=self.user_factors,
             item_factors=self.item_factors,
             regularization=self.regularization,
@@ -429,20 +431,12 @@ def save(self, file):
             cg_steps=self.cg_steps,
             calculate_training_loss=self.calculate_training_loss,
             dtype=self.dtype.name,
+            random_state=self.random_state,
         )
-
-    @classmethod
-    def load(cls, file):
-        if isinstance(file, str) and not file.endswith(".npz"):
-            file = file + ".npz"
-        with np.load(file, allow_pickle=False) as data:
-            ret = cls()
-            for k, v in data.items():
-                if k == "dtype":
-                    ret.dtype = np.dtype(str(v))
-                else:
-                    setattr(ret, k, v)
-            return ret
+        # filter out 'None' valued args, since we can't go np.load on
+        # them without using pickle
+        args = {k: v for k, v in args.items() if v is not None}
+        np.savez(fileobj_or_path, **args)
 
 
 def least_squares(Cui, X, Y, regularization, num_threads=0):

diff --git a/implicit/cpu/bpr.pyx b/implicit/cpu/bpr.pyx
@@ -213,15 +213,17 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
             regularization=self.regularization,
             iterations=self.iterations,
             verify_negative_samples=self.verify_negative_samples,
+            random_state=self.random_state,
         )
-        ret.user_factors = implicit.gpu.Matrix(self.user_factors)
-        ret.item_factors = implicit.gpu.Matrix(self.item_factors)
+
+        if self.user_factors is not None:
+            ret.user_factors = implicit.gpu.Matrix(self.user_factors)
+        if self.item_factors is not None:
+            ret.item_factors = implicit.gpu.Matrix(self.item_factors)
         return ret
 
-    def save(self, file):
-        np.savez(
-            file,
-            user_factors=self.user_factors,
+    def save(self, fileobj_or_path):
+        args = dict(user_factors=self.user_factors,
             item_factors=self.item_factors,
             regularization=self.regularization,
             factors=self.factors,
@@ -230,20 +232,13 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
             num_threads=self.num_threads,
             iterations=self.iterations,
             dtype=self.dtype.name,
+            random_state=self.random_state
         )
 
-    @classmethod
-    def load(cls, file):
-        if isinstance(file, str) and not file.endswith(".npz"):
-            file = file + ".npz"
-        with np.load(file, allow_pickle=False) as data:
-            ret = cls()
-            for k, v in data.items():
-                if k == "dtype":
-                    ret.dtype = np.dtype(str(v))
-                else:
-                    setattr(ret, k, v)
-            return ret
+        # filter out 'None' valued args, since we can't go np.load on
+        # them without using pickle
+        args = {k:v for k,v in args.items() if v is not None}
+        np.savez(fileobj_or_path, **args)
 
 
 @cython.cdivision(True)

diff --git a/implicit/cpu/lmf.pyx b/implicit/cpu/lmf.pyx
@@ -196,9 +196,8 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
 
         self._check_fit_errors()
 
-    def save(self, file):
-        np.savez(
-            file,
+    def save(self, fileobj_or_path):
+        args = dict(
             user_factors=self.user_factors,
             item_factors=self.item_factors,
             regularization=self.regularization,
@@ -208,20 +207,12 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
             num_threads=self.num_threads,
             iterations=self.iterations,
             dtype=self.dtype.name,
-        )
-
-    @classmethod
-    def load(cls, file):
-        if isinstance(file, str) and not file.endswith(".npz"):
-            file = file + ".npz"
-        with np.load(file, allow_pickle=False) as data:
-            ret = cls()
-            for k, v in data.items():
-                if k == "dtype":
-                    ret.dtype = np.dtype(str(v))
-                else:
-                    setattr(ret, k, v)
-            return ret
+            random_state=self.random_state)
+
+        # filter out 'None' valued args, since we can't go np.load on
+        # them without using pickle
+        args = {k:v for k,v in args.items() if v is not None}
+        np.savez(fileobj_or_path, **args)
 
 
 @cython.cdivision(True)

diff --git a/implicit/gpu/als.py b/implicit/gpu/als.py
@@ -269,6 +269,7 @@ def to_cpu(self) -> implicit.cpu.als.AlternatingLeastSquares:
             regularization=self.regularization,
             iterations=self.iterations,
             calculate_training_loss=self.calculate_training_loss,
+            random_state=self.random_state,
         )
         ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
         ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None

diff --git a/implicit/gpu/bpr.py b/implicit/gpu/bpr.py
@@ -159,6 +159,7 @@ def to_cpu(self) -> implicit.cpu.bpr.BayesianPersonalizedRanking:
             regularization=self.regularization,
             iterations=self.iterations,
             verify_negative_samples=self.verify_negative_samples,
+            random_state=self.random_state,
         )
         ret.user_factors = self.user_factors.to_numpy() if self.user_factors is not None else None
         ret.item_factors = self.item_factors.to_numpy() if self.item_factors is not None else None

diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py
@@ -152,24 +152,27 @@ def __setstate__(self, state):
         else:
             self.scorer = None
 
-    def save(self, file):
+    def save(self, fileobj_or_path):
+        args = dict(K=self.K)
         m = self.similarity
-        np.savez(file, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K)
+        if m is not None:
+            args.update(dict(shape=m.shape, data=m.data, indptr=m.indptr, indices=m.indices))
+        np.savez(fileobj_or_path, **args)
 
     @classmethod
-    def load(cls, file):
+    def load(cls, fileobj_or_path):
         # numpy.save automatically appends a npz suffic, numpy.load doesn't apparently
-        if isinstance(file, str) and not file.endswith(".npz"):
-            file = file + ".npz"
-
-        with np.load(file, allow_pickle=False) as data:
-            similarity = csr_matrix(
-                (data["data"], data["indices"], data["indptr"]), shape=data["shape"]
-            )
+        if isinstance(fileobj_or_path, str) and not fileobj_or_path.endswith(".npz"):
+            fileobj_or_path = fileobj_or_path + ".npz"
 
+        with np.load(fileobj_or_path, allow_pickle=False) as data:
             ret = cls()
-            ret.similarity = similarity
-            ret.scorer = NearestNeighboursScorer(similarity)
+            if data.get("data") is not None:
+                similarity = csr_matrix(
+                    (data["data"], data["indices"], data["indptr"]), shape=data["shape"]
+                )
+                ret.similarity = similarity
+                ret.scorer = NearestNeighboursScorer(similarity)
             ret.K = data["K"]
             return ret
 

diff --git a/implicit/recommender_base.py b/implicit/recommender_base.py
@@ -2,6 +2,8 @@
 import warnings
 from abc import ABCMeta, abstractmethod
 
+import numpy as np
+
 
 class ModelFitError(Exception):
     pass
@@ -166,13 +168,12 @@ def save(self, file):
         """
 
     @classmethod
-    @abstractmethod
-    def load(cls, file) -> "RecommenderBase":
+    def load(cls, fileobj_or_path) -> "RecommenderBase":
         """Loads the model from a file
 
         Parameters
         ----------
-        file : str or io.IOBase
+        fileobj_or_path : str or io.IOBase
             Either the filename or an open file-like object to load the model from
 
         Returns
@@ -185,6 +186,17 @@ def load(cls, file) -> "RecommenderBase":
         save
         numpy.load
         """
+        if isinstance(fileobj_or_path, str) and not fileobj_or_path.endswith(".npz"):
+            fileobj_or_path = fileobj_or_path + ".npz"
+        with np.load(fileobj_or_path, allow_pickle=False) as data:
+            ret = cls()
+            for k, v in data.items():
+                if k == "dtype":
+                    v = np.dtype(str(v))
+                elif v.shape == ():
+                    v = v.item()
+                setattr(ret, k, v)
+            return ret
 
     def rank_items(self, userid, user_items, selected_items, recalculate_user=False):
         warnings.warn(

diff --git a/tests/approximate_als_test.py b/tests/approximate_als_test.py
@@ -26,6 +26,9 @@ def test_pickle(self):
         def test_serialization(self):
             pass
 
+        def test_serialization_without_fit(self):
+            pass
+
     if HAS_CUDA:
 
         class AnnoyALSGPUTest(unittest.TestCase, RecommenderBaseTestMixin):
@@ -41,6 +44,9 @@ def test_pickle(self):
             def test_serialization(self):
                 pass
 
+            def test_serialization_without_fit(self):
+                pass
+
 except ImportError:
     pass
 
@@ -64,6 +70,9 @@ def test_pickle(self):
         def test_serialization(self):
             pass
 
+        def test_serialization_without_fit(self):
+            pass
+
     if HAS_CUDA:
         # nmslib doesn't support querying on the gpu, but we should be able to still use a GPU als
         # model with the nmslib index
@@ -84,6 +93,9 @@ def test_pickle(self):
             def test_serialization(self):
                 pass
 
+            def test_serialization_without_fit(self):
+                pass
+
 except ImportError:
     pass
 
@@ -103,6 +115,9 @@ def test_pickle(self):
         def test_serialization(self):
             pass
 
+        def test_serialization_without_fit(self):
+            pass
+
     if HAS_CUDA:
 
         class FaissALSGPUTest(unittest.TestCase, RecommenderBaseTestMixin):
@@ -138,6 +153,9 @@ def test_pickle(self):
             def test_serialization(self):
                 pass
 
+            def test_serialization_without_fit(self):
+                pass
+
 except ImportError:
     pass
 

diff --git a/tests/recommender_base_test.py b/tests/recommender_base_test.py
@@ -423,3 +423,11 @@ def test_serialization(self):
                 reloaded = model.load(f)
                 assert_array_equal(model.similar_items(1)[0], reloaded.similar_items(1)[0])
                 assert_array_equal(model.similar_items(1)[1], reloaded.similar_items(1)[1])
+
+    def test_serialization_without_fit(self):
+        model = self._get_model()
+        with tempfile.TemporaryDirectory() as tmpdir:
+            filename = os.path.join(tmpdir, "model.npz")
+            model.save(filename)
+            reloaded = model.load(filename)
+            assert model.__dict__ == reloaded.__dict__