Skip to content

Commit

Permalink
Rewritten model
Browse files Browse the repository at this point in the history
  • Loading branch information
lyriccoder committed Jun 15, 2020
1 parent 8ec3e6d commit 11e49e5
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 62 deletions.
11 changes: 5 additions & 6 deletions aibolit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,12 @@ def list_dir(path, files):

def predict(input_params, model, args):
features_order = model.features_conf['features_order']
# load model
input = [input_params[i] for i in features_order]
# add ncss to last column. We will normalize all patterns by that value
input = [input_params[i] for i in features_order] + [input_params['M2']]
th = float(args.threshold) or 1.0
# preds, importances = model.predict(np.array(input))
preds, importances = model.informative(np.array(input))
preds, importances = model.informative(np.array(input), th=th)

return {features_order[int(x)]: int(x) for x in preds}, importances
return {features_order[int(x)]: int(x) for x in preds[0]}, importances


def run_parse_args(commands_dict):
Expand Down Expand Up @@ -217,7 +216,7 @@ def inference(
code_lines = code_lines_dict.get('lines_' + key)
importance = importances[iter]
# We show only patterns with positive importance
if code_lines and importance > 0.00000e-20:
if code_lines and importance > 0:
if code_lines:
pattern_name = \
[x['name'] for x in Config.get_patterns_config()['patterns']
Expand Down
99 changes: 43 additions & 56 deletions aibolit/model/model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from decimal import localcontext, ROUND_DOWN, Decimal
from typing import List

import numpy as np
Expand Down Expand Up @@ -98,11 +99,13 @@ def fit(self, X, y, display=False):
self.model = model
self.model.fit(X, y.ravel(), logging_level='Silent')

def __get_pairs(self, item, th: float):
def __get_pairs(self, item, th: float, feature_importances=None):
def sigmoid(x):
return 1 / (1 + np.exp(-x))

pattern_importances = item * self.model.feature_importances_
if not feature_importances:
feature_importances = self.model.feature_importances_
pattern_importances = item * feature_importances
# mask discards not significant patterns
th_mask = (sigmoid(pattern_importances) <= th) + 0
pattern_importances = pattern_importances * th_mask
Expand All @@ -112,47 +115,7 @@ def sigmoid(x):
def __vstack_arrays(self, res):
return np.vstack(res).T

def predict1(self, X, return_acts=False, quantity_func='log', th=1.0):
"""
Args:
X: np.array with shape (number of snippets, number of patterns) or
(number of patterns, ).
quantity_func: str, type of function that will be applied to
number of occurrences.
th (float): Sensitivity of algorithm to recommend.
0 - ignore all recomendations
1 - use all recommendations
Returns:
ranked: np.array with shape (number of snippets, number of patterns)
of sorted patterns in non-increasing order for each snippet of
code.
"""

if X.ndim == 1:
X = X.copy()
X = np.expand_dims(X, axis=0)

ranked = []
quantity_funcs = {
'log': lambda x: np.log1p(x) / np.log(10),
'exp': lambda x: np.exp(x + 1),
'linear': lambda x: x,
}

for snippet in X:
try:
item = quantity_funcs[quantity_func](snippet)
pairs = self.__vstack_arrays(self.__get_pairs(item, th))
pairs = pairs[pairs[:, 0].argsort()]
ranked.append(pairs[:, 1].T.tolist()[::-1])
except Exception:
raise Exception("Unknown func")

if not return_acts:
return (np.array(ranked), pairs[:, 0].T.tolist()[::-1])
return np.array(ranked), pairs[:, 0].T.tolist()[::-1], np.zeros(X.shape[0]) - 1

def predict(self, X, quantity_func='log', th=1.0):
def calculate_score(self, X, quantity_func='log', th=1.0, feature_importances=None):
"""
Args:
X: np.array with shape (number of snippets, number of patterns) or
Expand Down Expand Up @@ -185,15 +148,16 @@ def predict(self, X, quantity_func='log', th=1.0):
for snippet in X:
try:
item = quantity_funcs[quantity_func](snippet)
pairs = self.__vstack_arrays(self.__get_pairs(item, th))
pairs = self.__vstack_arrays(self.__get_pairs(item, th, feature_importances))
pairs = pairs[pairs[:, 0].argsort()]
ranked.append(pairs[:, 1].T.tolist()[::-1])
except Exception:
except Exception as e:
import traceback
traceback.print_exc()
raise Exception("Unknown func")

return (np.array(ranked), pairs[:, 0].T.tolist()[::-1])


def get_array(self, X, mask, i, incr):
"""
Args:
Expand All @@ -206,7 +170,7 @@ def get_array(self, X, mask, i, incr):
"""

X1 = X.copy()
X1[:, i][mask[:, i]] += incr
X1[:, i][mask[:, i]] += incr[mask[:, i]]

return X1

Expand All @@ -225,7 +189,7 @@ def get_minimum(self, c1, c2, c3):

return np.min(c, 0), np.argmin(c, 0)

def informative(self, X):
def informative(self, X, scale=True, th=1.0):
"""
Args:
X: np.array with shape (number of snippets, number of patterns) or
Expand All @@ -240,17 +204,40 @@ def informative(self, X):
"""
X = X.copy()
X = np.expand_dims(X, axis=0)
# remember it, since we will use `log` function for non-normalized input value
X_old = X[:, :-1]

ncss = X[:, -1]
if scale:
X = X[:, :-1] / ncss.reshape(-1, 1)
else:
X = X[:, :-1]

k = X.shape[1]
complexity = self.model.predict(X)
mask = X > 0
importances = []
print(f' complexity: {complexity}')
for i in range(k):
complexity_minus = self.model.predict(self.get_array(X, mask, i, -1))
diff = complexity - complexity_minus
if i == 11:
print()
importances.append((i, diff[0]))

sorted_arr = sorted(importances, key=lambda x: x[1], reverse=True)
return [x[0] for x in sorted_arr], [x[1] for x in sorted_arr]
if X[0][i] == 0:
# do not need to predict if we have 0
importances.append((i, 0))
continue
complexity_minus = self.model.predict(self.get_array(X, mask, i, -1 / ncss))
if complexity_minus < complexity:
# complexity decreased
with localcontext() as ctx:
ctx.rounding = ROUND_DOWN
abs_diff = abs(complexity - complexity_minus)[0]
diff = Decimal(abs_diff).quantize(Decimal('0.001'))
diff = float(diff * 100)
elif complexity_minus > complexity:
# complexity increased, we do not need such value, set to 0,
# cause we need only patterns when complexity decreased
diff = 0
else:
diff = 0
importances.append((i, diff))

only_importances = list(np.array(importances).T[1])
return self.calculate_score(X_old[0], th=th, feature_importances=only_importances)

0 comments on commit 11e49e5

Please sign in to comment.