Add pylint and codespell to linters

benfred · Nov 26, 2021 · 5c05bf5 · 5c05bf5
1 parent 029f11d
commit 5c05bf5
Show file tree

Hide file tree

Showing 26 changed files with 160 additions and 91 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -3,11 +3,7 @@
 
 name: Build
 
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
+on: [push, pull_request]
 
 jobs:
   build:
@@ -27,7 +23,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 isort cpplint black pytest
+        pip install flake8 isort cpplint black pytest codespell h5py pylint
         pip install -r requirements.txt
     - name: Lint with flake8
       run: |
@@ -42,6 +38,12 @@ jobs:
     - name: Lint with isort
       run: |
         isort -c .
+    - name: Lint with codespell
+      run: |
+        codespell
+    - name: Lint with pylint
+      run: |
+        pylint implicit
     - name: Build
       run: |
         python setup.py develop

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+repos:
+      - repo: https://github.com/timothycrosley/isort
+        rev: 5.10.1
+        hooks:
+        - id: isort
+          additional_dependencies: [toml]
+      - repo: https://github.com/python/black
+        rev: 21.11b1
+        hooks:
+        - id: black
+      - repo: https://github.com/pycqa/flake8
+        rev: 4.0.1
+        hooks:
+        - id: flake8
+      - repo: https://github.com/pycqa/pylint
+        rev: v2.12.1
+        hooks:
+        - id: pylint
+      - repo: https://github.com/codespell-project/codespell
+        rev: v2.1.0
+        hooks:
+        - id: codespell
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,46 @@
+[MASTER]
+
+extension-pkg-whitelist=implicit.cpu._als,implicit._nearest_neighbours,implicit.gpu._cuda,implicit.cpu.bpr,implicit.cpu.topk,numpy.random.mtrand
+
+[MESSAGES CONTROL]
+disable=fixme,
+    missing-function-docstring,
+    missing-module-docstring,
+    missing-class-docstring,
+    wrong-import-order,
+    wrong-import-position,
+    ungrouped-imports,
+    line-too-long,
+    superfluous-parens,
+    trailing-whitespace,
+    invalid-name,
+    import-error,
+    no-self-use,
+
+    # disable code-complexity check
+    too-many-function-args,
+    too-many-instance-attributes,
+    too-many-locals,
+    too-many-branches,
+    too-many-nested-blocks,
+    too-many-statements,
+    too-many-arguments,
+    too-many-return-statements,
+    too-many-lines,
+    too-few-public-methods,
+
+    # TODO: fix underlying errors for these
+    import-outside-toplevel,
+    not-callable,
+    unused-argument,
+    abstract-method,
+    arguments-differ,
+    no-member,
+    no-name-in-module,
+    arguments-renamed,
+    import-self,
+
+[SIMILARITIES]
+min-similarity-lines=16
+ignore-docstrings=yes
+ignore-imports=yes
diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ which can be installed with homebrew: ```brew install gcc```. Running on Windows
 3.5+.
 
 GPU Support requires at least version 11 of the [NVidia CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). The build will use the ```nvcc``` compiler
-that is found on the path, but this can be overriden by setting the CUDAHOME enviroment variable
+that is found on the path, but this can be overridden by setting the CUDAHOME environment variable
 to point to your cuda installation.
 
 This library has been tested with Python 3.6, 3.7, 3.8 and 3.9 on Ubuntu, OSX and Windows.

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -66,4 +66,4 @@ Note that this dataset was filtered down for all versions to reviews that were p
 stars), to simulate a truly implicit dataset.
 
 Implicit on the CPU seems to suffer a bit here relative to the other options. It seems like there might
-be a single threaded bottleneck at some point thats worth examining later.
+be a single threaded bottleneck at some point that's worth examining later.
diff --git a/implicit/als.py b/implicit/als.py
@@ -61,15 +61,14 @@ def AlternatingLeastSquares(
             calculate_training_loss=calculate_training_loss,
             random_state=random_state,
         )
-    else:
-        return implicit.cpu.als.AlternatingLeastSquares(
-            factors,
-            regularization,
-            dtype,
-            use_native,
-            use_cg,
-            iterations,
-            calculate_training_loss,
-            num_threads,
-            random_state,
-        )
+    return implicit.cpu.als.AlternatingLeastSquares(
+        factors,
+        regularization,
+        dtype,
+        use_native,
+        use_cg,
+        iterations,
+        calculate_training_loss,
+        num_threads,
+        random_state,
+    )
diff --git a/implicit/approximate_als.py b/implicit/approximate_als.py
@@ -69,13 +69,13 @@ class NMSLibAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         method="hnsw",
         index_params=None,
         query_params=None,
         random_state=None,
-        *args,
         **kwargs
     ):
         if index_params is None:
@@ -93,9 +93,9 @@ def __init__(
         self.index_params = index_params
         self.query_params = query_params
 
-        super(NMSLibAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        self.max_norm = None
+
+        super().__init__(*args, random_state=random_state, **kwargs)
 
     def fit(self, Cui, show_progress=True):
         # nmslib can be a little chatty when first imported, disable some of
@@ -104,7 +104,7 @@ def fit(self, Cui, show_progress=True):
         import nmslib
 
         # train the model
-        super(NMSLibAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         # create index for similar_items
         if self.approximate_similar_items:
@@ -137,7 +137,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
-            return super(NMSLibAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         neighbours, distances = self.similar_items_index.knnQuery(self.item_factors[itemid], N)
         return zip(neighbours, 1.0 - distances)
@@ -152,7 +152,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(NMSLibAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -216,21 +216,20 @@ class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         n_trees=50,
         search_k=-1,
         random_state=None,
-        *args,
         **kwargs
     ):
 
-        super(AnnoyAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        super().__init__(*args, random_state=random_state, **kwargs)
 
         self.similar_items_index = None
         self.recommend_index = None
+        self.max_norm = None
 
         self.approximate_similar_items = approximate_similar_items
         self.approximate_recommend = approximate_recommend
@@ -243,7 +242,7 @@ def fit(self, Cui, show_progress=True):
         import annoy
 
         # train the model
-        super(AnnoyAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         # build up an Annoy Index with all the item_factors (for calculating
         # similar items)
@@ -267,7 +266,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
-            return super(AnnoyAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         neighbours, dist = self.similar_items_index.get_nns_by_item(
             itemid, N, search_k=self.search_k, include_distances=True
@@ -285,7 +284,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(AnnoyAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -353,18 +352,20 @@ class FaissAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         nlist=400,
         nprobe=20,
         use_gpu=implicit.gpu.HAS_CUDA,
         random_state=None,
-        *args,
         **kwargs
     ):
 
         self.similar_items_index = None
         self.recommend_index = None
+        self.quantizer = None
+        self.gpu_resources = None
 
         self.approximate_similar_items = approximate_similar_items
         self.approximate_recommend = approximate_recommend
@@ -373,15 +374,13 @@ def __init__(
         self.nlist = nlist
         self.nprobe = nprobe
         self.use_gpu = use_gpu
-        super(FaissAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        super().__init__(*args, random_state=random_state, **kwargs)
 
     def fit(self, Cui, show_progress=True):
         import faiss
 
         # train the model
-        super(FaissAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         self.quantizer = faiss.IndexFlat(self.factors)
 
@@ -433,7 +432,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items or (self.use_gpu and N >= 1024):
-            return super(FaissAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         factors = self.item_factors[itemid]
         factors /= numpy.linalg.norm(factors)
@@ -452,7 +451,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(FaissAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -474,7 +473,7 @@ def recommend(
         # the GPU variant of faiss doesn't support returning more than 1024 results.
         # fall back to the exact match when this happens
         if self.use_gpu and count >= 1024:
-            return super(FaissAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,

diff --git a/implicit/bpr.py b/implicit/bpr.py
@@ -60,14 +60,13 @@ def BayesianPersonalizedRanking(
             verify_negative_samples=verify_negative_samples,
             random_state=random_state,
         )
-    else:
-        return implicit.cpu.bpr.BayesianPersonalizedRanking(
-            factors,
-            learning_rate,
-            regularization,
-            dtype=dtype,
-            num_threads=num_threads,
-            iterations=iterations,
-            verify_negative_samples=verify_negative_samples,
-            random_state=random_state,
-        )
+    return implicit.cpu.bpr.BayesianPersonalizedRanking(
+        factors,
+        learning_rate,
+        regularization,
+        dtype=dtype,
+        num_threads=num_threads,
+        iterations=iterations,
+        verify_negative_samples=verify_negative_samples,
+        random_state=random_state,
+    )
diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
@@ -69,7 +69,7 @@ def __init__(
         random_state=None,
     ):
 
-        super(AlternatingLeastSquares, self).__init__()
+        super().__init__()
 
         # parameters on how to factorize
         self.factors = factors
@@ -260,15 +260,15 @@ def explain(self, userid, user_items, itemid, user_weights=None, N=10):
         total_score = 0.0
         h = []
         h_len = 0
-        for itemid, confidence in nonzeros(user_items, userid):
+        for other_itemid, confidence in nonzeros(user_items, userid):
             if confidence < 0:
                 continue
 
-            factor = self.item_factors[itemid]
+            factor = self.item_factors[other_itemid]
             # s_u^ij = (y_i^t W^u) y_j
             score = weighted_item.dot(factor) * confidence
             total_score += score
-            contribution = (score, itemid)
+            contribution = (score, other_itemid)
             if h_len < N:
                 heapq.heappush(h, contribution)
                 h_len += 1
@@ -384,7 +384,7 @@ def least_squares_cg(Cui, X, Y, regularization, num_threads=0, cg_steps=3):
         if rsold < 1e-20:
             continue
 
-        for it in range(cg_steps):
+        for _ in range(cg_steps):
             # calculate Ap = YtCuYp - without actually calculating YtCuY
             Ap = YtY.dot(p)
             for i, confidence in nonzeros(Cui, u):

diff --git a/implicit/cpu/bpr.pyx b/implicit/cpu/bpr.pyx
@@ -176,7 +176,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
 
         # we accept num_threads = 0 as indicating to create as many threads as we have cores,
         # but in that case we need the number of cores, since we need to initialize RNG state per
-        # thread. Get the appropiate value back from openmp
+        # thread. Get the appropriate value back from openmp
         cdef int num_threads = self.num_threads
         if not num_threads:
             num_threads = multiprocessing.cpu_count()