From 5c05bf5935b80229573621a73d224bcf4992021d Mon Sep 17 00:00:00 2001
From: Ben Frederickson <github@benfrederickson.com>
Date: Thu, 25 Nov 2021 21:27:42 -0800
Subject: [PATCH] Add pylint and codespell to linters

---
 .github/workflows/build.yml               | 14 ++++---
 .pre-commit-config.yaml                   | 22 +++++++++++
 .pylintrc                                 | 46 +++++++++++++++++++++++
 README.md                                 |  2 +-
 benchmarks/README.md                      |  2 +-
 implicit/als.py                           | 23 ++++++------
 implicit/approximate_als.py               | 43 +++++++++++----------
 implicit/bpr.py                           | 21 +++++------
 implicit/cpu/als.py                       | 10 ++---
 implicit/cpu/bpr.pyx                      |  2 +-
 implicit/cpu/matrix_factorization_base.py |  8 +---
 implicit/cpu/topk.pyx                     |  2 +-
 implicit/datasets/lastfm.py               |  2 +-
 implicit/datasets/million_song_dataset.py |  2 +-
 implicit/datasets/movielens.py            |  6 +--
 implicit/datasets/reddit.py               |  2 +-
 implicit/gpu/als.py                       |  2 +-
 implicit/gpu/bpr.py                       |  8 ++--
 implicit/gpu/matrix_factorization_base.py |  2 +
 implicit/gpu/utils.cuh                    |  2 +-
 implicit/lmf.pyx                          |  2 +-
 implicit/nearest_neighbours.py            |  2 +-
 implicit/recommender_base.py              |  5 +--
 implicit/utils.py                         |  2 +-
 setup.cfg                                 |  5 +++
 tests/recommender_base_test.py            | 14 +++----
 26 files changed, 160 insertions(+), 91 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .pylintrc

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 12321e4b..caaf2876 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -3,11 +3,7 @@
 
 name: Build
 
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
+on: [push, pull_request]
 
 jobs:
   build:
@@ -27,7 +23,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 isort cpplint black pytest
+        pip install flake8 isort cpplint black pytest codespell h5py pylint
         pip install -r requirements.txt
     - name: Lint with flake8
       run: |
@@ -42,6 +38,12 @@ jobs:
     - name: Lint with isort
       run: |
         isort -c .
+    - name: Lint with codespell
+      run: |
+        codespell
+    - name: Lint with pylint
+      run: |
+        pylint implicit
     - name: Build
       run: |
         python setup.py develop
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..ccb0b34e
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+repos:
+      - repo: https://github.com/timothycrosley/isort
+        rev: 5.10.1
+        hooks:
+        - id: isort
+          additional_dependencies: [toml]
+      - repo: https://github.com/python/black
+        rev: 21.11b1
+        hooks:
+        - id: black
+      - repo: https://github.com/pycqa/flake8
+        rev: 4.0.1
+        hooks:
+        - id: flake8
+      - repo: https://github.com/pycqa/pylint
+        rev: v2.12.1
+        hooks:
+        - id: pylint
+      - repo: https://github.com/codespell-project/codespell
+        rev: v2.1.0
+        hooks:
+        - id: codespell
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 00000000..9a140043
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,46 @@
+[MASTER]
+
+extension-pkg-whitelist=implicit.cpu._als,implicit._nearest_neighbours,implicit.gpu._cuda,implicit.cpu.bpr,implicit.cpu.topk,numpy.random.mtrand
+
+[MESSAGES CONTROL]
+disable=fixme,
+    missing-function-docstring,
+    missing-module-docstring,
+    missing-class-docstring,
+    wrong-import-order,
+    wrong-import-position,
+    ungrouped-imports,
+    line-too-long,
+    superfluous-parens,
+    trailing-whitespace,
+    invalid-name,
+    import-error,
+    no-self-use,
+
+    # disable code-complexity check
+    too-many-function-args,
+    too-many-instance-attributes,
+    too-many-locals,
+    too-many-branches,
+    too-many-nested-blocks,
+    too-many-statements,
+    too-many-arguments,
+    too-many-return-statements,
+    too-many-lines,
+    too-few-public-methods,
+
+    # TODO: fix underlying errors for these
+    import-outside-toplevel,
+    not-callable,
+    unused-argument,
+    abstract-method,
+    arguments-differ,
+    no-member,
+    no-name-in-module,
+    arguments-renamed,
+    import-self,
+
+[SIMILARITIES]
+min-similarity-lines=16
+ignore-docstrings=yes
+ignore-imports=yes
diff --git a/README.md b/README.md
index a98a4600..172c7dc1 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ which can be installed with homebrew: ```brew install gcc```. Running on Windows
 3.5+.
 
 GPU Support requires at least version 11 of the [NVidia CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). The build will use the ```nvcc``` compiler
-that is found on the path, but this can be overriden by setting the CUDAHOME enviroment variable
+that is found on the path, but this can be overridden by setting the CUDAHOME environment variable
 to point to your cuda installation.
 
 This library has been tested with Python 3.6, 3.7, 3.8 and 3.9 on Ubuntu, OSX and Windows.
diff --git a/benchmarks/README.md b/benchmarks/README.md
index e69394fb..346e0c81 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -66,4 +66,4 @@ Note that this dataset was filtered down for all versions to reviews that were p
 stars), to simulate a truly implicit dataset.
 
 Implicit on the CPU seems to suffer a bit here relative to the other options. It seems like there might
-be a single threaded bottleneck at some point thats worth examining later.
+be a single threaded bottleneck at some point that's worth examining later.
diff --git a/implicit/als.py b/implicit/als.py
index ced62558..dbabb93a 100644
--- a/implicit/als.py
+++ b/implicit/als.py
@@ -61,15 +61,14 @@ def AlternatingLeastSquares(
             calculate_training_loss=calculate_training_loss,
             random_state=random_state,
         )
-    else:
-        return implicit.cpu.als.AlternatingLeastSquares(
-            factors,
-            regularization,
-            dtype,
-            use_native,
-            use_cg,
-            iterations,
-            calculate_training_loss,
-            num_threads,
-            random_state,
-        )
+    return implicit.cpu.als.AlternatingLeastSquares(
+        factors,
+        regularization,
+        dtype,
+        use_native,
+        use_cg,
+        iterations,
+        calculate_training_loss,
+        num_threads,
+        random_state,
+    )
diff --git a/implicit/approximate_als.py b/implicit/approximate_als.py
index eba15807..e18c207e 100644
--- a/implicit/approximate_als.py
+++ b/implicit/approximate_als.py
@@ -69,13 +69,13 @@ class NMSLibAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         method="hnsw",
         index_params=None,
         query_params=None,
         random_state=None,
-        *args,
         **kwargs
     ):
         if index_params is None:
@@ -93,9 +93,9 @@ def __init__(
         self.index_params = index_params
         self.query_params = query_params
 
-        super(NMSLibAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        self.max_norm = None
+
+        super().__init__(*args, random_state=random_state, **kwargs)
 
     def fit(self, Cui, show_progress=True):
         # nmslib can be a little chatty when first imported, disable some of
@@ -104,7 +104,7 @@ def fit(self, Cui, show_progress=True):
         import nmslib
 
         # train the model
-        super(NMSLibAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         # create index for similar_items
         if self.approximate_similar_items:
@@ -137,7 +137,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
-            return super(NMSLibAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         neighbours, distances = self.similar_items_index.knnQuery(self.item_factors[itemid], N)
         return zip(neighbours, 1.0 - distances)
@@ -152,7 +152,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(NMSLibAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -216,21 +216,20 @@ class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         n_trees=50,
         search_k=-1,
         random_state=None,
-        *args,
         **kwargs
     ):
 
-        super(AnnoyAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        super().__init__(*args, random_state=random_state, **kwargs)
 
         self.similar_items_index = None
         self.recommend_index = None
+        self.max_norm = None
 
         self.approximate_similar_items = approximate_similar_items
         self.approximate_recommend = approximate_recommend
@@ -243,7 +242,7 @@ def fit(self, Cui, show_progress=True):
         import annoy
 
         # train the model
-        super(AnnoyAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         # build up an Annoy Index with all the item_factors (for calculating
         # similar items)
@@ -267,7 +266,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
-            return super(AnnoyAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         neighbours, dist = self.similar_items_index.get_nns_by_item(
             itemid, N, search_k=self.search_k, include_distances=True
@@ -285,7 +284,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(AnnoyAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -353,18 +352,20 @@ class FaissAlternatingLeastSquares(AlternatingLeastSquares):
 
     def __init__(
         self,
+        *args,
         approximate_similar_items=True,
         approximate_recommend=True,
         nlist=400,
         nprobe=20,
         use_gpu=implicit.gpu.HAS_CUDA,
         random_state=None,
-        *args,
         **kwargs
     ):
 
         self.similar_items_index = None
         self.recommend_index = None
+        self.quantizer = None
+        self.gpu_resources = None
 
         self.approximate_similar_items = approximate_similar_items
         self.approximate_recommend = approximate_recommend
@@ -373,15 +374,13 @@ def __init__(
         self.nlist = nlist
         self.nprobe = nprobe
         self.use_gpu = use_gpu
-        super(FaissAlternatingLeastSquares, self).__init__(
-            *args, random_state=random_state, **kwargs
-        )
+        super().__init__(*args, random_state=random_state, **kwargs)
 
     def fit(self, Cui, show_progress=True):
         import faiss
 
         # train the model
-        super(FaissAlternatingLeastSquares, self).fit(Cui, show_progress)
+        super().fit(Cui, show_progress)
 
         self.quantizer = faiss.IndexFlat(self.factors)
 
@@ -433,7 +432,7 @@ def fit(self, Cui, show_progress=True):
 
     def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items or (self.use_gpu and N >= 1024):
-            return super(FaissAlternatingLeastSquares, self).similar_items(itemid, N)
+            return super().similar_items(itemid, N)
 
         factors = self.item_factors[itemid]
         factors /= numpy.linalg.norm(factors)
@@ -452,7 +451,7 @@ def recommend(
         recalculate_user=False,
     ):
         if not self.approximate_recommend:
-            return super(FaissAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
@@ -474,7 +473,7 @@ def recommend(
         # the GPU variant of faiss doesn't support returning more than 1024 results.
         # fall back to the exact match when this happens
         if self.use_gpu and count >= 1024:
-            return super(FaissAlternatingLeastSquares, self).recommend(
+            return super().recommend(
                 userid,
                 user_items,
                 N=N,
diff --git a/implicit/bpr.py b/implicit/bpr.py
index 80cc258c..342bee2f 100644
--- a/implicit/bpr.py
+++ b/implicit/bpr.py
@@ -60,14 +60,13 @@ def BayesianPersonalizedRanking(
             verify_negative_samples=verify_negative_samples,
             random_state=random_state,
         )
-    else:
-        return implicit.cpu.bpr.BayesianPersonalizedRanking(
-            factors,
-            learning_rate,
-            regularization,
-            dtype=dtype,
-            num_threads=num_threads,
-            iterations=iterations,
-            verify_negative_samples=verify_negative_samples,
-            random_state=random_state,
-        )
+    return implicit.cpu.bpr.BayesianPersonalizedRanking(
+        factors,
+        learning_rate,
+        regularization,
+        dtype=dtype,
+        num_threads=num_threads,
+        iterations=iterations,
+        verify_negative_samples=verify_negative_samples,
+        random_state=random_state,
+    )
diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
index c2d15479..11bbbe64 100644
--- a/implicit/cpu/als.py
+++ b/implicit/cpu/als.py
@@ -69,7 +69,7 @@ def __init__(
         random_state=None,
     ):
 
-        super(AlternatingLeastSquares, self).__init__()
+        super().__init__()
 
         # parameters on how to factorize
         self.factors = factors
@@ -260,15 +260,15 @@ def explain(self, userid, user_items, itemid, user_weights=None, N=10):
         total_score = 0.0
         h = []
         h_len = 0
-        for itemid, confidence in nonzeros(user_items, userid):
+        for other_itemid, confidence in nonzeros(user_items, userid):
             if confidence < 0:
                 continue
 
-            factor = self.item_factors[itemid]
+            factor = self.item_factors[other_itemid]
             # s_u^ij = (y_i^t W^u) y_j
             score = weighted_item.dot(factor) * confidence
             total_score += score
-            contribution = (score, itemid)
+            contribution = (score, other_itemid)
             if h_len < N:
                 heapq.heappush(h, contribution)
                 h_len += 1
@@ -384,7 +384,7 @@ def least_squares_cg(Cui, X, Y, regularization, num_threads=0, cg_steps=3):
         if rsold < 1e-20:
             continue
 
-        for it in range(cg_steps):
+        for _ in range(cg_steps):
             # calculate Ap = YtCuYp - without actually calculating YtCuY
             Ap = YtY.dot(p)
             for i, confidence in nonzeros(Cui, u):
diff --git a/implicit/cpu/bpr.pyx b/implicit/cpu/bpr.pyx
index fee32204..285abe9f 100644
--- a/implicit/cpu/bpr.pyx
+++ b/implicit/cpu/bpr.pyx
@@ -176,7 +176,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
 
         # we accept num_threads = 0 as indicating to create as many threads as we have cores,
         # but in that case we need the number of cores, since we need to initialize RNG state per
-        # thread. Get the appropiate value back from openmp
+        # thread. Get the appropriate value back from openmp
         cdef int num_threads = self.num_threads
         if not num_threads:
             num_threads = multiprocessing.cpu_count()
diff --git a/implicit/cpu/matrix_factorization_base.py b/implicit/cpu/matrix_factorization_base.py
index d905bc2e..85724d93 100644
--- a/implicit/cpu/matrix_factorization_base.py
+++ b/implicit/cpu/matrix_factorization_base.py
@@ -120,18 +120,14 @@ def _user_factor(self, userid, user_items, recalculate_user=False):
         if recalculate_user:
             if np.isscalar(userid):
                 return self.recalculate_user(userid, user_items)
-            else:
-                return np.stack([self.recalculate_user(i, user_items) for i in userid])
-
+            return np.stack([self.recalculate_user(i, user_items) for i in userid])
         return self.user_factors[userid]
 
     def _item_factor(self, itemid, react_users, recalculate_item=False):
         if recalculate_item:
             if np.isscalar(itemid):
                 return self.recalculate_item(itemid, react_users)
-            else:
-                return np.stack([self.recalculate_item(i, react_users) for i in itemid])
-
+            return np.stack([self.recalculate_item(i, react_users) for i in itemid])
         return self.item_factors[itemid]
 
     def recalculate_user(self, userid, user_items):
diff --git a/implicit/cpu/topk.pyx b/implicit/cpu/topk.pyx
index 7b0442ff..98725792 100644
--- a/implicit/cpu/topk.pyx
+++ b/implicit/cpu/topk.pyx
@@ -20,7 +20,7 @@ def topk(items, query, int k, item_norms=None, filter_query_items=None, filter_i
     indices = np.zeros((query_rows, k), dtype="int32")
     distances = np.zeros((query_rows, k), dtype=query.dtype)
 
-    # TODO: figure out appropiate batch size from available memory
+    # TODO: figure out appropriate batch size from available memory
     cdef int batch_size = 100 # TODO
 
     cdef int batches = (query_rows / batch_size)
diff --git a/implicit/datasets/lastfm.py b/implicit/datasets/lastfm.py
index c9cc567f..4ad08a3a 100644
--- a/implicit/datasets/lastfm.py
+++ b/implicit/datasets/lastfm.py
@@ -36,7 +36,7 @@ def generate_dataset(filename, outputfilename):
     http://ocelma.net/MusicRecommendationDataset/lastfm-360K.html
 
     You shouldn't have to run this yourself, and can instead just download the
-    output using the 'get_lastfm' funciton./
+    output using the 'get_lastfm' function./
 
     Note there are some invalid entries in this dataset, running
     this function will clean it up so pandas can read it:
diff --git a/implicit/datasets/million_song_dataset.py b/implicit/datasets/million_song_dataset.py
index aecfdf9f..e0dc0493 100644
--- a/implicit/datasets/million_song_dataset.py
+++ b/implicit/datasets/million_song_dataset.py
@@ -54,7 +54,7 @@ def generate_dataset(
     https://labrosa.ee.columbia.edu/millionsong/pages/getting-dataset
 
     You shouldn't have to run this yourself, and can instead just download the
-    output using the 'get_msd_taste_profile' funciton
+    output using the 'get_msd_taste_profile' function
     """
     data = _read_triplets_dataframe(triplets_filename)
     track_info = _join_summary_file(data, summary_filename)
diff --git a/implicit/datasets/movielens.py b/implicit/datasets/movielens.py
index a5cff616..26688659 100644
--- a/implicit/datasets/movielens.py
+++ b/implicit/datasets/movielens.py
@@ -30,7 +30,7 @@ def get_movielens(variant="20m"):
         A sparse matrix where the row is the movieId, the column is the userId and the value is
         the rating.
     """
-    filename = "movielens_%s.hdf5" % variant
+    filename = f"movielens_{variant}.hdf5"
 
     path = os.path.join(_download.LOCAL_CACHE_DIR, filename)
     if not os.path.isfile(path):
@@ -50,9 +50,9 @@ def generate_dataset(path, variant="20m", outputpath="."):
     https://grouplens.org/datasets/movielens/20m/
 
     You shouldn't have to run this yourself, and can instead just download the
-    output using the 'get_movielens' funciton./
+    output using the 'get_movielens' function./
     """
-    filename = os.path.join(outputpath, "movielens_%s.hdf5" % variant)
+    filename = os.path.join(outputpath, f"movielens_{variant}.hdf5")
 
     if variant == "20m":
         ratings, movies = _read_dataframes_20M(path)
diff --git a/implicit/datasets/reddit.py b/implicit/datasets/reddit.py
index 297026c3..f3483c0a 100644
--- a/implicit/datasets/reddit.py
+++ b/implicit/datasets/reddit.py
@@ -40,7 +40,7 @@ def generate_dataset(filename, outputfilename):
     https://www.reddit.com/r/redditdev/comments/dtg4j/want_to_help_reddit_build_a_recommender_a_public/
 
     You shouldn't have to run this yourself, and can instead just download the
-    output using the 'get_reddit' funciton.
+    output using the 'get_reddit' function.
     """
     data = _read_dataframe(filename)
     _hfd5_from_dataframe(data, outputfilename)
diff --git a/implicit/gpu/als.py b/implicit/gpu/als.py
index 0f380ead..b11df189 100644
--- a/implicit/gpu/als.py
+++ b/implicit/gpu/als.py
@@ -54,7 +54,7 @@ def __init__(
         if not implicit.gpu.HAS_CUDA:
             raise ValueError("No CUDA extension has been built, can't train on GPU.")
 
-        super(AlternatingLeastSquares, self).__init__()
+        super().__init__()
 
         # parameters on how to factorize
         self.factors = factors
diff --git a/implicit/gpu/bpr.py b/implicit/gpu/bpr.py
index 5e6a7322..9ab97451 100644
--- a/implicit/gpu/bpr.py
+++ b/implicit/gpu/bpr.py
@@ -54,7 +54,7 @@ def __init__(
         verify_negative_samples=True,
         random_state=None,
     ):
-        super(BayesianPersonalizedRanking, self).__init__()
+        super().__init__()
         if not implicit.gpu.HAS_CUDA:
             raise ValueError("No CUDA extension has been built, can't train on GPU.")
 
@@ -142,10 +142,10 @@ def fit(self, user_items, show_progress=True):
                 )
                 progress.update(1)
                 total = len(user_items.data)
-                if total != 0 and total != skipped:
+                if total and total != skipped:
                     progress.set_postfix(
                         {
-                            "train_auc": "%.2f%%" % (100.0 * correct / (total - skipped)),
-                            "skipped": "%.2f%%" % (100.0 * skipped / total),
+                            "train_auc": f"{100.0 * correct / (total - skipped):0.2f}%",
+                            "skipped": f"{100.0 * skipped / total:0.2f}%",
                         }
                     )
diff --git a/implicit/gpu/matrix_factorization_base.py b/implicit/gpu/matrix_factorization_base.py
index f895425c..500c2cd4 100644
--- a/implicit/gpu/matrix_factorization_base.py
+++ b/implicit/gpu/matrix_factorization_base.py
@@ -26,6 +26,8 @@ def __init__(self):
         self.user_factors = None
         self._item_norms = None
         self._user_norms = None
+        self._user_norms_host = None
+        self._item_norms_host = None
         self._knn = implicit.gpu.KnnQuery()
 
     def recommend(
diff --git a/implicit/gpu/utils.cuh b/implicit/gpu/utils.cuh
index e71d64f8..5adadad3 100644
--- a/implicit/gpu/utils.cuh
+++ b/implicit/gpu/utils.cuh
@@ -86,7 +86,7 @@ float dot(float a, float b, float * shared) {
     float val = a * b ;
     val = warp_reduce_sum(val);
 
-    // write out the partial reduction to shared memory if appropiate
+    // write out the partial reduction to shared memory if appropriate
     if (lane == 0) {
         shared[warp] = val;
     }
diff --git a/implicit/lmf.pyx b/implicit/lmf.pyx
index 51022243..279eceec 100644
--- a/implicit/lmf.pyx
+++ b/implicit/lmf.pyx
@@ -113,7 +113,7 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
 
         # TODO: Add GPU training
         if self.use_gpu:
-            raise NotImplementedError("GPU version of LMF is not implemeneted yet!")
+            raise NotImplementedError("GPU version of LMF is not implemented yet!")
 
     @cython.cdivision(True)
     @cython.boundscheck(False)
diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py
index 1a9e8b7d..482c6ccb 100644
--- a/implicit/nearest_neighbours.py
+++ b/implicit/nearest_neighbours.py
@@ -168,7 +168,7 @@ class BM25Recommender(ItemItemRecommender):
     """An Item-Item Recommender on BM25 distance between items"""
 
     def __init__(self, K=20, K1=1.2, B=0.75, num_threads=0):
-        super(BM25Recommender, self).__init__(K, num_threads)
+        super().__init__(K, num_threads)
         self.K1 = K1
         self.B = B
 
diff --git a/implicit/recommender_base.py b/implicit/recommender_base.py
index 06b3ef82..c9868f88 100644
--- a/implicit/recommender_base.py
+++ b/implicit/recommender_base.py
@@ -7,7 +7,7 @@ class ModelFitError(Exception):
     pass
 
 
-class RecommenderBase(object):
+class RecommenderBase:
     """Defines the interface that all recommendations models here expose"""
 
     __metaclass__ = ABCMeta
@@ -54,7 +54,7 @@ def recommend(
             The number of results to return
         filter_already_liked_items: bool, optional
             When true, don't return items present in the training set that were rated
-            by the specificed user.
+            by the specified user.
         filter_items : sequence of ints, optional
             List of extra item ids to filter out from the output
         recalculate_user : bool, optional
@@ -93,7 +93,6 @@ def similar_users(self, userid, N=10):
 
     @abstractmethod
     def similar_items(self, itemid, N=10, react_users=None, recalculate_item=False):
-
         """
         Calculates a list of similar items
 
diff --git a/implicit/utils.py b/implicit/utils.py
index 8dedc94f..5c584fb4 100644
--- a/implicit/utils.py
+++ b/implicit/utils.py
@@ -17,7 +17,7 @@ def check_blas_config():
     """checks to see if using OpenBlas/Intel MKL. If so, warn if the number of threads isn't set
     to 1 (causes severe perf issues when training - can be 10x slower)"""
     # don't warn repeatedly
-    global _checked_blas_config
+    global _checked_blas_config  # pylint: disable=global-statement
     if _checked_blas_config:
         return
     _checked_blas_config = True
diff --git a/setup.cfg b/setup.cfg
index 64b9d377..4bb5ac10 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -22,6 +22,11 @@ known_third_party = scipy,annoy,numpy,cython,pandas
 line_length = 100
 skip = build,.eggs,.tox
 
+[codespell]
+skip = ./.git,./.github,./build,./dist,./docs/build,.*egg-info.*,*.csv,*.tsv
+ignore-words-list = als,coo,nd,unparseable,compiletime
+
+
 [bumpversion:file:implicit/__init__.py]
 
 [bumpversion:file:setup.py]
diff --git a/tests/recommender_base_test.py b/tests/recommender_base_test.py
index 8c431d6f..29fb9ced 100644
--- a/tests/recommender_base_test.py
+++ b/tests/recommender_base_test.py
@@ -38,7 +38,7 @@ def test_recommend(self):
         model.fit(item_users, show_progress=False)
 
         for userid in range(50):
-            ids, scores = model.recommend(userid, user_items, N=1)
+            ids, _ = model.recommend(userid, user_items, N=1)
             self.assertEqual(len(ids), 1)
 
             # the top item recommended should be the same as the userid:
@@ -49,12 +49,12 @@ def test_recommend(self):
         # try asking for more items than possible,
         # should return only the available items
         # https://github.com/benfred/implicit/issues/22
-        ids, scores = model.recommend(0, user_items, N=10000)
+        ids, _ = model.recommend(0, user_items, N=10000)
         self.assertTrue(len(ids))
 
         # filter recommended items using an additional filter list
         # https://github.com/benfred/implicit/issues/26
-        ids, scores = model.recommend(0, user_items, N=1, filter_items=[0])
+        ids, _ = model.recommend(0, user_items, N=1, filter_items=[0])
         self.assertTrue(0 not in set(ids))
 
     def test_recommend_batch(self):
@@ -95,7 +95,7 @@ def test_recalculate_user(self):
                 np.arange(50), user_items, N=1, recalculate_user=True
             )
         except NotImplementedError:
-            # some models don't support recalculating user on the fly, and thats ok
+            # some models don't support recalculating user on the fly, and that's ok
             return
 
         for userid in range(item_users.shape[1]):
@@ -195,7 +195,7 @@ def check_results(ids):
             )
             check_results(ids)
         except NotImplementedError:
-            # some models don't support recalculating user on the fly, and thats ok
+            # some models don't support recalculating user on the fly, and that's ok
             pass
 
     def test_zero_length_row(self):
@@ -236,7 +236,7 @@ def test_rank_items(self):
         for userid in range(50):
             selected_items = random.sample(range(50), 10)
 
-            ids, scores = model.recommend(
+            ids, _ = model.recommend(
                 userid, user_items, items=selected_items, filter_already_liked_items=False
             )
 
@@ -262,7 +262,7 @@ def test_rank_items_batch(self):
         model.fit(item_users, show_progress=False)
 
         selected_items = np.arange(10) * 3
-        ids, scores = model.recommend(np.arange(50), user_items, items=selected_items)
+        ids, _ = model.recommend(np.arange(50), user_items, items=selected_items)
 
         for userid in range(50):
             current_ids = ids[userid]