Skip to content

Commit

Permalink
Add pylint and codespell to linters
Browse files Browse the repository at this point in the history
  • Loading branch information
benfred committed Nov 26, 2021
1 parent 029f11d commit 5c05bf5
Show file tree
Hide file tree
Showing 26 changed files with 160 additions and 91 deletions.
14 changes: 8 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@

name: Build

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
on: [push, pull_request]

jobs:
build:
Expand All @@ -27,7 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 isort cpplint black pytest
pip install flake8 isort cpplint black pytest codespell h5py pylint
pip install -r requirements.txt
- name: Lint with flake8
run: |
Expand All @@ -42,6 +38,12 @@ jobs:
- name: Lint with isort
run: |
isort -c .
- name: Lint with codespell
run: |
codespell
- name: Lint with pylint
run: |
pylint implicit
- name: Build
run: |
python setup.py develop
Expand Down
22 changes: 22 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
repos:
- repo: https://github.com/timothycrosley/isort
rev: 5.10.1
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/python/black
rev: 21.11b1
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pycqa/pylint
rev: v2.12.1
hooks:
- id: pylint
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
46 changes: 46 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[MASTER]

extension-pkg-whitelist=implicit.cpu._als,implicit._nearest_neighbours,implicit.gpu._cuda,implicit.cpu.bpr,implicit.cpu.topk,numpy.random.mtrand

[MESSAGES CONTROL]
disable=fixme,
missing-function-docstring,
missing-module-docstring,
missing-class-docstring,
wrong-import-order,
wrong-import-position,
ungrouped-imports,
line-too-long,
superfluous-parens,
trailing-whitespace,
invalid-name,
import-error,
no-self-use,

# disable code-complexity check
too-many-function-args,
too-many-instance-attributes,
too-many-locals,
too-many-branches,
too-many-nested-blocks,
too-many-statements,
too-many-arguments,
too-many-return-statements,
too-many-lines,
too-few-public-methods,

# TODO: fix underlying errors for these
import-outside-toplevel,
not-callable,
unused-argument,
abstract-method,
arguments-differ,
no-member,
no-name-in-module,
arguments-renamed,
import-self,

[SIMILARITIES]
min-similarity-lines=16
ignore-docstrings=yes
ignore-imports=yes
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ which can be installed with homebrew: ```brew install gcc```. Running on Windows
3.5+.

GPU Support requires at least version 11 of the [NVidia CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). The build will use the ```nvcc``` compiler
that is found on the path, but this can be overriden by setting the CUDAHOME enviroment variable
that is found on the path, but this can be overridden by setting the CUDAHOME environment variable
to point to your cuda installation.

This library has been tested with Python 3.6, 3.7, 3.8 and 3.9 on Ubuntu, OSX and Windows.
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ Note that this dataset was filtered down for all versions to reviews that were p
stars), to simulate a truly implicit dataset.

Implicit on the CPU seems to suffer a bit here relative to the other options. It seems like there might
be a single threaded bottleneck at some point thats worth examining later.
be a single threaded bottleneck at some point that's worth examining later.
23 changes: 11 additions & 12 deletions implicit/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,14 @@ def AlternatingLeastSquares(
calculate_training_loss=calculate_training_loss,
random_state=random_state,
)
else:
return implicit.cpu.als.AlternatingLeastSquares(
factors,
regularization,
dtype,
use_native,
use_cg,
iterations,
calculate_training_loss,
num_threads,
random_state,
)
return implicit.cpu.als.AlternatingLeastSquares(
factors,
regularization,
dtype,
use_native,
use_cg,
iterations,
calculate_training_loss,
num_threads,
random_state,
)
43 changes: 21 additions & 22 deletions implicit/approximate_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ class NMSLibAlternatingLeastSquares(AlternatingLeastSquares):

def __init__(
self,
*args,
approximate_similar_items=True,
approximate_recommend=True,
method="hnsw",
index_params=None,
query_params=None,
random_state=None,
*args,
**kwargs
):
if index_params is None:
Expand All @@ -93,9 +93,9 @@ def __init__(
self.index_params = index_params
self.query_params = query_params

super(NMSLibAlternatingLeastSquares, self).__init__(
*args, random_state=random_state, **kwargs
)
self.max_norm = None

super().__init__(*args, random_state=random_state, **kwargs)

def fit(self, Cui, show_progress=True):
# nmslib can be a little chatty when first imported, disable some of
Expand All @@ -104,7 +104,7 @@ def fit(self, Cui, show_progress=True):
import nmslib

# train the model
super(NMSLibAlternatingLeastSquares, self).fit(Cui, show_progress)
super().fit(Cui, show_progress)

# create index for similar_items
if self.approximate_similar_items:
Expand Down Expand Up @@ -137,7 +137,7 @@ def fit(self, Cui, show_progress=True):

def similar_items(self, itemid, N=10):
if not self.approximate_similar_items:
return super(NMSLibAlternatingLeastSquares, self).similar_items(itemid, N)
return super().similar_items(itemid, N)

neighbours, distances = self.similar_items_index.knnQuery(self.item_factors[itemid], N)
return zip(neighbours, 1.0 - distances)
Expand All @@ -152,7 +152,7 @@ def recommend(
recalculate_user=False,
):
if not self.approximate_recommend:
return super(NMSLibAlternatingLeastSquares, self).recommend(
return super().recommend(
userid,
user_items,
N=N,
Expand Down Expand Up @@ -216,21 +216,20 @@ class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):

def __init__(
self,
*args,
approximate_similar_items=True,
approximate_recommend=True,
n_trees=50,
search_k=-1,
random_state=None,
*args,
**kwargs
):

super(AnnoyAlternatingLeastSquares, self).__init__(
*args, random_state=random_state, **kwargs
)
super().__init__(*args, random_state=random_state, **kwargs)

self.similar_items_index = None
self.recommend_index = None
self.max_norm = None

self.approximate_similar_items = approximate_similar_items
self.approximate_recommend = approximate_recommend
Expand All @@ -243,7 +242,7 @@ def fit(self, Cui, show_progress=True):
import annoy

# train the model
super(AnnoyAlternatingLeastSquares, self).fit(Cui, show_progress)
super().fit(Cui, show_progress)

# build up an Annoy Index with all the item_factors (for calculating
# similar items)
Expand All @@ -267,7 +266,7 @@ def fit(self, Cui, show_progress=True):

def similar_items(self, itemid, N=10):
if not self.approximate_similar_items:
return super(AnnoyAlternatingLeastSquares, self).similar_items(itemid, N)
return super().similar_items(itemid, N)

neighbours, dist = self.similar_items_index.get_nns_by_item(
itemid, N, search_k=self.search_k, include_distances=True
Expand All @@ -285,7 +284,7 @@ def recommend(
recalculate_user=False,
):
if not self.approximate_recommend:
return super(AnnoyAlternatingLeastSquares, self).recommend(
return super().recommend(
userid,
user_items,
N=N,
Expand Down Expand Up @@ -353,18 +352,20 @@ class FaissAlternatingLeastSquares(AlternatingLeastSquares):

def __init__(
self,
*args,
approximate_similar_items=True,
approximate_recommend=True,
nlist=400,
nprobe=20,
use_gpu=implicit.gpu.HAS_CUDA,
random_state=None,
*args,
**kwargs
):

self.similar_items_index = None
self.recommend_index = None
self.quantizer = None
self.gpu_resources = None

self.approximate_similar_items = approximate_similar_items
self.approximate_recommend = approximate_recommend
Expand All @@ -373,15 +374,13 @@ def __init__(
self.nlist = nlist
self.nprobe = nprobe
self.use_gpu = use_gpu
super(FaissAlternatingLeastSquares, self).__init__(
*args, random_state=random_state, **kwargs
)
super().__init__(*args, random_state=random_state, **kwargs)

def fit(self, Cui, show_progress=True):
import faiss

# train the model
super(FaissAlternatingLeastSquares, self).fit(Cui, show_progress)
super().fit(Cui, show_progress)

self.quantizer = faiss.IndexFlat(self.factors)

Expand Down Expand Up @@ -433,7 +432,7 @@ def fit(self, Cui, show_progress=True):

def similar_items(self, itemid, N=10):
if not self.approximate_similar_items or (self.use_gpu and N >= 1024):
return super(FaissAlternatingLeastSquares, self).similar_items(itemid, N)
return super().similar_items(itemid, N)

factors = self.item_factors[itemid]
factors /= numpy.linalg.norm(factors)
Expand All @@ -452,7 +451,7 @@ def recommend(
recalculate_user=False,
):
if not self.approximate_recommend:
return super(FaissAlternatingLeastSquares, self).recommend(
return super().recommend(
userid,
user_items,
N=N,
Expand All @@ -474,7 +473,7 @@ def recommend(
# the GPU variant of faiss doesn't support returning more than 1024 results.
# fall back to the exact match when this happens
if self.use_gpu and count >= 1024:
return super(FaissAlternatingLeastSquares, self).recommend(
return super().recommend(
userid,
user_items,
N=N,
Expand Down
21 changes: 10 additions & 11 deletions implicit/bpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,13 @@ def BayesianPersonalizedRanking(
verify_negative_samples=verify_negative_samples,
random_state=random_state,
)
else:
return implicit.cpu.bpr.BayesianPersonalizedRanking(
factors,
learning_rate,
regularization,
dtype=dtype,
num_threads=num_threads,
iterations=iterations,
verify_negative_samples=verify_negative_samples,
random_state=random_state,
)
return implicit.cpu.bpr.BayesianPersonalizedRanking(
factors,
learning_rate,
regularization,
dtype=dtype,
num_threads=num_threads,
iterations=iterations,
verify_negative_samples=verify_negative_samples,
random_state=random_state,
)
10 changes: 5 additions & 5 deletions implicit/cpu/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(
random_state=None,
):

super(AlternatingLeastSquares, self).__init__()
super().__init__()

# parameters on how to factorize
self.factors = factors
Expand Down Expand Up @@ -260,15 +260,15 @@ def explain(self, userid, user_items, itemid, user_weights=None, N=10):
total_score = 0.0
h = []
h_len = 0
for itemid, confidence in nonzeros(user_items, userid):
for other_itemid, confidence in nonzeros(user_items, userid):
if confidence < 0:
continue

factor = self.item_factors[itemid]
factor = self.item_factors[other_itemid]
# s_u^ij = (y_i^t W^u) y_j
score = weighted_item.dot(factor) * confidence
total_score += score
contribution = (score, itemid)
contribution = (score, other_itemid)
if h_len < N:
heapq.heappush(h, contribution)
h_len += 1
Expand Down Expand Up @@ -384,7 +384,7 @@ def least_squares_cg(Cui, X, Y, regularization, num_threads=0, cg_steps=3):
if rsold < 1e-20:
continue

for it in range(cg_steps):
for _ in range(cg_steps):
# calculate Ap = YtCuYp - without actually calculating YtCuY
Ap = YtY.dot(p)
for i, confidence in nonzeros(Cui, u):
Expand Down
2 changes: 1 addition & 1 deletion implicit/cpu/bpr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):

# we accept num_threads = 0 as indicating to create as many threads as we have cores,
# but in that case we need the number of cores, since we need to initialize RNG state per
# thread. Get the appropiate value back from openmp
# thread. Get the appropriate value back from openmp
cdef int num_threads = self.num_threads
if not num_threads:
num_threads = multiprocessing.cpu_count()
Expand Down
Loading

0 comments on commit 5c05bf5

Please sign in to comment.