From 78887a91508d9236f1c338b5ef51818aaf381f81 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Fri, 12 Mar 2021 22:06:36 +0100
Subject: [PATCH 1/7] file renamed

---
 test/{test_ranking.py => test_rank.py} | 6 ++++++
 1 file changed, 6 insertions(+)
 rename test/{test_ranking.py => test_rank.py} (85%)

diff --git a/test/test_ranking.py b/test/test_rank.py
similarity index 85%
rename from test/test_ranking.py
rename to test/test_rank.py
index 83040f3..b36c7d3 100644
--- a/test/test_ranking.py
+++ b/test/test_rank.py
@@ -26,6 +26,12 @@ def test1():
         {"method": "pvalue", "avg": "exist", "calibration": "isotonic"},
         {"method": "pvalue", "avg": "all", "calibration": "minmax"},
         {"method": "pvalue", "avg": "exist", "calibration": "minmax"},
+        {"method": "btl", "calibration": "platt"},
+        {"method": "btl", "calibration": "isotonic"},
+        {"method": "btl", "calibration": "minmax"},
+        {"method": "orme", "calibration": "platt"},
+        {"method": "orme", "calibration": "isotonic"},
+        {"method": "orme", "calibration": "minmax"},
         {"method": "eigen", "calibration": "platt"},
         {"method": "eigen", "calibration": "isotonic"},
         {"method": "eigen", "calibration": "minmax"},

From d5d461349164379705718ad47e6fed51027915b2 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Fri, 12 Mar 2021 22:28:40 +0100
Subject: [PATCH 2/7] references added

---
 README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/README.md b/README.md
index 91b578f..d1d9557 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,10 @@ The output has the following structure
 
 **Warning**: `len(examples)` must be a multiple of `(n_items - 1)`
 
+**References:**
+
+- Section 5 (page 4) in: Hamster, U. A. (2021, March 9). Extracting Pairwise Comparisons Data from Best-Worst Scaling Surveys by Logical Inference. [https://doi.org/10.31219/osf.io/qkxej](https://doi.org/10.31219/osf.io/qkxej)
+
 
 ## Counting
 **Input Data:**
@@ -125,10 +129,28 @@ agg_dok, direct_dok, direct_detail, logical_dok, logical_detail = bws.count(
     logical_dok=logical_dok, logical_detail=logical_detail, logical_database=database)
 ```
 
+**References:**
+
+- Section 3-4 in: Hamster, U. A. (2021, March 9). Extracting Pairwise Comparisons Data from Best-Worst Scaling Surveys by Logical Inference. [https://doi.org/10.31219/osf.io/qkxej](https://doi.org/10.31219/osf.io/qkxej)
+
 
 ## Ranking
+**Input Data:**
+The input data is a Dictionary of Keys (DoK) object produced by `bwsample.count`. 
+
+**Call the function:**
+The function `bwsample.rank` computes a python index variable with a proposed ordering (`ranked`), and ordered list of example IDs (`ordids`), scores (`scores`) and further information depending on the selected `method`.
+
+```python
+import bwsample as bws
+ranked, ordids, scores, info = bws.ranking(dok, method='ratio')
+```
 
+**References:**
 
+- Eigenvector solution in: Saaty, T. L. (2003). Decision-making with the AHP: Why is the principal eigenvector nec- essary. European Journal of Operational Research, 145(1), 85–91. [https://doi.org/10.1016/S0377-2217(02)00227-8](https://doi.org/10.1016/S0377-2217(02)00227-8)
+- Estimating the BTL model in: Hunter, D. R. (2004). MM algorithms for generalized Bradley-Terry models. The Annals of Statistics, 32(1), 384–406. [https://doi.org/10.1214/aos/1079120141](https://doi.org/10.1214/aos/1079120141)
+- MaxDiff score in: Orme, B. (2009). MaxDiff Analysis: Simple Counting, Individual-Level Logit, and HB. [https://sawtoothsoftware.com/uploads/sawtoothsoftware/originals/f89a6537-1cae-4fb5-afad-9d325c2a3143.pdf](https://sawtoothsoftware.com/uploads/sawtoothsoftware/originals/f89a6537-1cae-4fb5-afad-9d325c2a3143.pdf)
 
 ## Appendix
 

From 532f66d5392e54eb3915eb2f3b999aa02284f757 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Sat, 13 Mar 2021 18:11:10 +0100
Subject: [PATCH 3/7] ranking.py refactored

---
 bwsample/__init__.py     |   5 +-
 bwsample/ranking.py      | 490 +++++++++++++++------------------------
 bwsample/utils.py        |  70 +++++-
 test/test_adjustscore.py |  41 ++++
 test/test_rank.py        |  56 +++--
 5 files changed, 318 insertions(+), 344 deletions(-)
 create mode 100644 test/test_adjustscore.py

diff --git a/bwsample/__init__.py b/bwsample/__init__.py
index 648675a..794bffa 100644
--- a/bwsample/__init__.py
+++ b/bwsample/__init__.py
@@ -2,6 +2,5 @@
 
 from .sampling import sample
 from .counting import count
-# logical_infer
-from .utils import (to_scipy, add_dok)
-from .ranking import (rank)
+from .ranking import rank
+from .utils import (to_scipy, add_dok, adjustscore)
diff --git a/bwsample/ranking.py b/bwsample/ranking.py
index 535baf6..cef25aa 100644
--- a/bwsample/ranking.py
+++ b/bwsample/ranking.py
@@ -1,74 +1,18 @@
 from typing import List, Dict, Tuple, Optional
 from .utils import to_scipy
+from .utils import adjustscore
 import numpy as np
 import scipy.sparse
 import scipy.sparse.linalg
 import scipy.linalg
 import scipy.stats
-import sklearn.linear_model
-import sklearn.isotonic
-
-
-def minmax(arr: np.array) -> np.array:
-    data = np.array(arr)
-    xmin = data.min()
-    xmax = data.max()
-    return (data - xmin) / (xmax - xmin)
-
-
-def calibrate(scores: np.array,
-              labels: np.array,
-              method: Optional[str] = None) -> np.array:
-    """Wrapper function to calibrate scores with its binary labels
-
-    Parameters:
-    -----------
-    scores: np.array
-        The scores generated by a model. It's assumed that these scores
-          are probabilities with values between [0,1]. For example, apply
-          min-max-scaling for ratio-scale data types (i.e. score>0.0).
-
-    labels: np.array
-        The binary labels that are supposed to be classified by the scores.
-
-    method: str (Default: None)
-        The calibration algorithm:
-            - 'platt' for Platt-Scaling (Platt, 1999)
-            - 'isotonic' for Isotonic Regression (Zadrozny and Elkan, 2002)
-
-    Return:
-    -------
-    calibrated_scores : np.array
-        The predicted probabilities
-
-    References:
-    -----------
-    Platt, J., 1999. Probabilistic outputs for support vector machines and
-        comparisons to regularized likelihood methods.
-
-    Zadrozny, B., Elkan, C., 2002. Transforming classifier scores into
-        accurate multiclass probability estimates, in: Proceedings of the
-        Eighth ACM SIGKDD International Conference on Knowledge Discovery
-        and Data Mining, KDD ’02. Association for Computing Machinery,
-        New York, NY, USA, pp. 694–699. https://doi.org/10.1145/775047.775151
-    """
-    scores = np.array(scores)
-    labels = np.array(labels)
-    if method == 'platt':
-        cls = sklearn.linear_model.LogisticRegression()
-        cls.fit(X=scores.reshape(-1, 1), y=labels)
-        return cls.predict_proba(scores.reshape(-1, 1))[:, 1]
-    elif method == 'isotonic':
-        cls = sklearn.isotonic.IsotonicRegression(out_of_bounds='raise')
-        cls.fit(X=scores, y=labels)
-        return cls.transform(scores)
-    else:
-        return scores
 
 
 def rank(dok: Dict[Tuple[str, str], int],
-         method: Optional[str] = 'pvalue', **kwargs):
-    """Rank and score items based on pairwise comparison frequencies
+         method: Optional[str] = 'ratio',
+         adjust: Optional[str] = None,
+         **kwargs) -> (np.array, np.array, np.array, dict):
+    """Rank items based on pairwise comparison frequencies
 
     Parameters:
     -----------
@@ -77,61 +21,80 @@ def rank(dok: Dict[Tuple[str, str], int],
 
     method : Optional[str]
         The procedure to compute ranks and scores.
-        - 'ratios'
-        - 'btl'
+        - 'ratio'
         - 'pvalue'
-        - 'orme'
+        - 'btl'
         - 'eigen'
-        - 'transition'
+        - 'trans'
 
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
+
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    scores : np.array[float]
+        Scaled or calibrated metrics (Default: The `metrics` if `adjust=None`)
 
-    info
-        Output depends on the selected method
+    info : dict
+        Further information depending on the selected `method`
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, info = bws.ranking(dok, method='pvalue')
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='ratio', avg='exist', adjust='ordinal')
     """
+    # convert to sparse matrix
     cnt, indices = to_scipy(dok)
-    if method in ('ratios'):
-        return ranking_maximize_ratios(cnt, indices, **kwargs)
+
+    # compute the rankings
+    if method in ('ratio'):
+        positions, sortedids, metrics, info = maximize_ratio(
+            cnt, indices, **kwargs)
     elif method in ('pvalue'):
-        return ranking_minus_pvalues(cnt, indices, **kwargs)
-    elif method in ('btl', 'bradley', 'hunter'):
-        return ranking_btl(cnt, indices, **kwargs)
-    elif method in ('orme'):
-        return scoring_orme(cnt, indices, **kwargs)
-    elif method in ('eigen'):
-        return scoring_eigenvector(cnt, indices, **kwargs)
-    elif method in ('transition'):
-        return transition_simulation(cnt, indices, **kwargs)
+        positions, sortedids, metrics, info = maximize_minuspvalue(
+            cnt, indices, **kwargs)
+    elif method in ('btl', 'hunter'):
+        positions, sortedids, metrics, info = bradley_terry_probability(
+            cnt, indices, **kwargs)
+    elif method in ('eigen', 'saaty'):
+        positions, sortedids, metrics, info = eigenvector_estimation(
+            cnt, indices, **kwargs)
+    elif method in ('trans'):
+        positions, sortedids, metrics, info = transition_simulation(
+            cnt, indices, **kwargs)
     else:
         raise Exception(f"method='{method}' not available.")
 
+    # adjust scores
+    if adjust is not None:
+        cut = np.median(metrics)
+        labels = [x >= cut for x in metrics]
+        scores = adjustscore(metrics, method=adjust, labels=labels)
+    else:
+        scores = metrics.copy()
+
+    # done
+    return positions, sortedids, metrics, scores, info
+
 
-def ranking_maximize_ratios(cnt: scipy.sparse.csr_matrix,
-                            indices: List[str],
-                            avg: Optional[str] = 'exist',
-                            calibration: Optional[str] = 'platt'):
+def maximize_ratio(cnt: scipy.sparse.csr_matrix,
+                   indices: List[str],
+                   avg: Optional[str] = 'exist'):
     """Rank items based simple ratios, and calibrate row sums as scores
 
     Parameters:
@@ -142,42 +105,38 @@ def ranking_maximize_ratios(cnt: scipy.sparse.csr_matrix,
     indices : List[str]
         Identifiers, e.g. UUID4, of each row/column of the `cnt` matrix.
 
-    avg : Optional[str]
+    avg : Optional[str] = 'exists'
         How to compute denominator for averaging.
         - 'all': divide the sum of ratios by the row length
         - 'exist': divide the sum of ratios by the number of ratios in the row
 
-    calibration: str (Default: None)
-        The calibrated scores. For 'platt' and 'isotonic' we assume
-          `label[i]=rowsum[i]>mean(rowsum)`.
-
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    info
-        The matrix with the ratios
+    info : dict
+        Further information depending on the selected `method`
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, ratios = bws.rank(
-            dok, method='ratio', avg='exist', calibration='platt')
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='ratio', avg='exist')
     """
     # compute ratios
     cnt = cnt.tocsr()
@@ -186,35 +145,30 @@ def ranking_maximize_ratios(cnt: scipy.sparse.csr_matrix,
     ratios = ratios.multiply(cnt)
 
     # sum rows in DoK matrix
-    metric = np.array(ratios.sum(axis=1).flatten())[0]
+    metrics = np.array(ratios.sum(axis=1).flatten())[0]
     # averaging
     if avg == 'all':
-        metric /= len(metric)
+        metrics /= len(metrics)
     elif avg == 'exist':
         ridx, _ = (ratios + ratios.T).nonzero()  # ensure actual 0s are counted
         for i, c in zip(*np.unique(ridx, return_counts=True)):
-            metric[i] /= c
+            metrics[i] /= c
 
     # sort, larger row sums are better
-    ranked = np.argsort(-metric)  # maximize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = metric[ranked]
+    positions = np.argsort(-metrics)  # maximize
+    sortedids = np.array(indices)[positions]
+    metrics = metrics[positions]
 
-    # calibrate scores
-    if calibration in ('platt', 'isotonic'):
-        labels = scores > np.mean(scores)  # TRUE: s>mean(s)
-        scores = calibrate(scores, labels, method=calibration)
-    elif calibration == 'minmax':
-        scores = minmax(scores)
+    # informations
+    info = {}
 
     # done
-    return ranked.tolist(), ordids, scores.tolist(), ratios
+    return positions, sortedids, metrics, info
 
 
-def ranking_minus_pvalues(cnt: scipy.sparse.csr_matrix,
-                          indices: List[str],
-                          avg: Optional[str] = 'exist',
-                          calibration: Optional[str] = 'platt'):
+def maximize_minuspvalue(cnt: scipy.sparse.csr_matrix,
+                         indices: List[str],
+                         avg: Optional[str] = 'exist'):
     """Rank based on p-values of a Chi-Squard tests between reciprocal pairs,
         and calibrate row sums as scores
 
@@ -231,37 +185,34 @@ def ranking_minus_pvalues(cnt: scipy.sparse.csr_matrix,
         - 'all': divide the sum of ratios by the row length
         - 'exist': divide the sum of ratios by the number of ratios in the row
 
-    calibration: str (Default: None)
-        The calibrated scores. For 'platt' and 'isotonic' we assume
-          `label[i]=rowsum[i]>mean(rowsum)`.
-
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    info
-        The matrix with the `1-p`-values
+    info : dict
+        Further information depending on the selected `method`, e.g.
+        - "P": The matrix with the `1-p`-values
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, (eigval, eigenvec) = bws.rank(
-            dok, method='pvalue', avg='exist', calibration='platt')
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='pvalue', avg='exist')
     """
     # compute p-values for Nij>Nji or 1
     n, _ = cnt.shape
@@ -280,104 +231,30 @@ def ranking_minus_pvalues(cnt: scipy.sparse.csr_matrix,
                         P[j, i] = 1 - pval
 
     # sum rows in DoK matrix
-    metric = np.array(P.sum(axis=1).flatten())[0]
+    metrics = np.array(P.sum(axis=1).flatten())[0]
     # averaging
     if avg == 'all':
-        metric /= len(metric)
+        metrics /= len(metrics)
     elif avg == 'exist':
         ridx, _ = (P + P.T).nonzero()  # ensure actual 0s are counted
         for i, c in zip(*np.unique(ridx, return_counts=True)):
-            metric[i] /= c
+            metrics[i] /= c
 
     # sort, larger row sums are better
-    ranked = np.argsort(-metric)  # minimize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = metric[ranked]
+    positions = np.argsort(-metrics)  # minimize P, maximize 1-P
+    sortedids = np.array(indices)[positions]
+    metrics = metrics[positions]
 
-    # calibrate scores
-    if calibration in ('platt', 'isotonic'):
-        labels = scores > np.mean(scores)  # TRUE: s>mean(s)
-        scores = calibrate(scores, labels, method=calibration)
-    elif calibration == 'minmax':
-        scores = minmax(scores)
+    # informations
+    info = {}
+    info["P"] = P
 
     # done
-    return ranked.tolist(), ordids, scores.tolist(), P
-
+    return positions, sortedids, metrics, info
 
-def scoring_orme(cnt: scipy.sparse.csr_matrix,
-                 indices: List[str],
-                 calibration: Optional[str] = 'platt'):
-    """Scoring based on Orme (2009)
-
-    Parameters:
-    -----------
-    cnt : scipy.sparse.dok.dok_matrix
-        Quadratic sparse matrix with frequency data
 
-    indices : List[str]
-        Identifiers, e.g. UUID4, of each row/column of the `cnt` matrix.
-
-    calibration: str (Default: None)
-        The calibrated scores. For 'platt' and 'isotonic' we assume
-          `label[i]=rowsum[i]>mean(rowsum)`.
-
-    Returns:
-    --------
-    ranked : List[int]
-        The array positions to order/sort the original data by indexing.
-
-    ordids : List[int]
-        The item IDs in the new order.
-
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
-
-    info
-        the ratios from [-1, +1]
-
-    Example:
-    --------
-        import bwsample as bws
-        data = (
-            ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
-            ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
-            ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
-            ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
-            ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
-        )
-        _, dok_direct, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, ratios = bws.rank(
-            dok_direct, method='omre', calibration='platt')
-    References:
-    -----------
-    Orme, B., 2009. MaxDiff Analysis: Simple Counting, Individual-Level
-      Logit, and HB. https://api.semanticscholar.org/CorpusID:202605777
-    """
-    # compute ratios
-    cnt = cnt.tocsr()
-    metric = (cnt - cnt.T).sum(axis=1) / (cnt + cnt.T).sum(axis=1)
-    metric = np.array(metric.flatten())[0]
-
-    # sort, larger row sums are better
-    ranked = np.argsort(-metric)  # maximize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = metric[ranked]
-
-    # calibrate scores
-    if calibration in ('platt', 'isotonic'):
-        labels = scores > 0  # TRUE: s>0
-        scores = calibrate(scores, labels, method=calibration)
-    elif calibration == 'minmax':
-        scores = minmax(scores)
-
-    # done
-    return ranked.tolist(), ordids, scores.tolist(), metric
-
-
-def scoring_eigenvector(cnt: scipy.sparse.csr_matrix,
-                        indices: List[str],
-                        calibration: Optional[str] = None):
+def eigenvector_estimation(cnt: scipy.sparse.csr_matrix,
+                           indices: List[str]):
     """Compute the eigenvectors of the pairwise comparison matrix, and
         calibrate eigenvectors as scores.
 
@@ -389,38 +266,35 @@ def scoring_eigenvector(cnt: scipy.sparse.csr_matrix,
     indices : List[str]
         Identifiers, e.g. UUID4, of each row/column of the `cnt` matrix.
 
-    calibration: str (Default: None)
-        The calibrated scores. For 'platt' and 'isotonic' we assume
-          `label[i]=eigenvector[i]>0.5`. There is also the option to run
-          Min-Max-Scaling (`'minmax'`) but won't recommend using it.
-
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    info
-        (eigval, eigenvec)
+    info : dict
+        Further information depending on the selected `method`, e.g.
+        - "eigval": Estimated eigenvalue
+        - "eigenvec": Estimated eigenvector
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, (eigval, eigenvec) = bws.rank(
-            dok, method='eigen', calibration=None)
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='eigen')
 
     References:
     -----------
@@ -434,7 +308,9 @@ def scoring_eigenvector(cnt: scipy.sparse.csr_matrix,
     for i in range(n):
         cnt[i, i] = 1
 
-    # compute "positive reciprocal near consistent pairwise comparison matrix"
+    # Compute a sparse "positive reciprocal near consistent pairwise
+    #   comparison matrix". Avoid accidental conversion into dense matrix
+    #   by manipulating the value/data vector of the transposed sp matrix.
     cnt = cnt.tocsr()
     cntT = cnt.T
     cntT.data = 1.0 / cntT.data
@@ -442,28 +318,25 @@ def scoring_eigenvector(cnt: scipy.sparse.csr_matrix,
 
     # compute eigenvectors as scores
     eigval, eigenvec = scipy.sparse.linalg.eigs(ratios, k=1)
-    metric = np.abs(np.real(eigenvec[:, 0]))
+    metrics = np.abs(np.real(eigenvec[:, 0]))
 
     # sort, larger row sums are better
-    ranked = np.argsort(-metric)  # maximize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = metric[ranked]
+    positions = np.argsort(-metrics)  # maximize
+    sortedids = np.array(indices)[positions]
+    metrics = metrics[positions]
 
-    # calibrate scores
-    if calibration in ('platt', 'isotonic'):
-        labels = scores > .5  # TRUE: s>.5
-        scores = calibrate(scores, labels, method=calibration)
-    elif calibration == 'minmax':
-        scores = minmax(scores)
+    # informations
+    info = {}
+    info["eigval"] = eigval
+    info["eigenvec"] = eigenvec
 
     # done
-    return ranked.tolist(), ordids, scores.tolist(), (eigval, eigenvec)
+    return positions, sortedids, metrics, info
 
 
 def transition_simulation(cnt: scipy.sparse.dok.dok_matrix,
                           indices: List[str],
-                          n_rounds: Optional[int] = 3,
-                          calibration: Optional[str] = 'minmax'):
+                          n_rounds: Optional[int] = 2):
     """Estimate transition matrix of item_i>item_j, simulate the item
         probabilities that are calibrated to scores.
 
@@ -475,39 +348,38 @@ def transition_simulation(cnt: scipy.sparse.dok.dok_matrix,
     indices : List[str]
         Identifiers, e.g. UUID4, of each row/column of the `cnt` matrix.
 
-    calibration: Optional[str]  (Default: 'platt')
-        The calibrated scores. We are predicting transition probabilities
-          here, i.e. `SUM[transprob]=1`. Thus, recommend using Min-Max-Scaling
-          (`'minmax'`).
+    n_rounds: Optional[int] = 2
+        Number of steps/rounds to simulate
 
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    info : Tuple
-        (x, transmat) `x` is is the predicted/simulated item probability,
-          and `transmat` the estimated transition probability matrix.
+    info : dict
+        Further information depending on the selected `method`, e.g.
+        - "sim": The predicted/simulated item probability
+        - "transmat: The estimated transition probability matrix.
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, (x, transmat) = bws.rank(
-            dok, method='transition', n_rounds=3, calibration='minmax')
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='trans', n_rounds=3)
     """
     n = cnt.shape[0]
 
@@ -528,16 +400,17 @@ def transition_simulation(cnt: scipy.sparse.dok.dok_matrix,
         x = x * transmat
 
     # sort, larger state probabilities are better
-    ranked = np.argsort(-x)  # maximize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = x[ranked]
+    positions = np.argsort(-x)  # maximize
+    sortedids = np.array(indices)[positions]
+    metrics = x[positions]
 
-    # calibrate scores
-    if calibration == 'minmax':
-        scores = minmax(scores)
+    # informations
+    info = {}
+    info["sim"] = x
+    info["transmat"] = transmat
 
     # done
-    return ranked.tolist(), ordids, scores.tolist(), (x, transmat)
+    return positions, sortedids, metrics, info
 
 
 def mle_btl_sparse(cnt: scipy.sparse.csr_matrix,
@@ -572,9 +445,8 @@ def mle_btl_sparse(cnt: scipy.sparse.csr_matrix,
 
     References:
     -----------
-    Hunter, D.R., 2004. MM algorithms for generalized Bradley-Terry models.
-      The Annals of Statistics 32, 384–406.
-      https://doi.org/10.1214/aos/1079120141
+    Hunter, D.R., 2004. MM algorithms for generalized Bradley-Terry models. The
+      Annals of Statistics 32, 384–406. https://doi.org/10.1214/aos/1079120141
     """
     # ensure CSR format
     cnt = cnt.tocsr()
@@ -626,12 +498,18 @@ def mle_btl_sparse(cnt: scipy.sparse.csr_matrix,
     return np.array(x1.flatten())[0], False
 
 
-def ranking_btl(cnt: scipy.sparse.csr_matrix,
-                indices: List[str],
-                calibration: Optional[str] = 'minmax',
-                prefit: Optional[bool] = True,
-                max_iter: Optional[int] = 50,
-                tol: Optional[float] = 1e-5):
+def minmax(arr: np.array) -> np.array:
+    data = np.array(arr)
+    xmin = data.min()
+    xmax = data.max()
+    return (data - xmin) / (xmax - xmin)
+
+
+def bradley_terry_probability(cnt: scipy.sparse.csr_matrix,
+                              indices: List[str],
+                              prefit: Optional[bool] = True,
+                              max_iter: Optional[int] = 50,
+                              tol: Optional[float] = 1e-5):
     """Bradley-Terry-Luce (BTL) probability model for pairwise comparisons
 
     Parameters:
@@ -642,9 +520,6 @@ def ranking_btl(cnt: scipy.sparse.csr_matrix,
     indices : List[str]
         Identifiers, e.g. UUID4, of each row/column of the `cnt` matrix.
 
-    calibration: str (Default: None)
-        The calibrated scores. We recommend using Min-Max-Scaling (`'minmax'`)
-
     prefit : bool
         flag to prefit parameters with 'ratio' method
         (see `ranking_maximize_ratios`)
@@ -657,31 +532,32 @@ def ranking_btl(cnt: scipy.sparse.csr_matrix,
 
     Returns:
     --------
-    ranked : List[int]
+    positions : np.array[uint64]
         The array positions to order/sort the original data by indexing.
 
-    ordids : List[int]
-        The item IDs in the new order.
+    sortedids : np.array[any]
+        The reordered item IDs
 
-    scores : List[float]
-        The scores for each item ID. Also sorted in descending order.
+    metrics : np.array[float]
+        The metric for each item ID. Also sorted in descending order.
 
-    info
-        x are the estimated MLE parameters that can be used for scoring
+    info : dict
+        Further information depending on the selected `method`, e.g.
+        - "weights": The estimated MLE parameters that can be used for scoring
 
     Example:
     --------
         import bwsample as bws
-        data = (
+        evaluations = (
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
             ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
             ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
             ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
         )
-        dok, _, _, _ = bws.extract_pairs_batch2(data)
-        ranked, ordids, scores, x = bws.rank(
-            dok, method='btl', calibration='minmax')
+        agg_dok, _, _, _, _ = bws.count(evaluations)
+        positions, sortedids, metrics, info = bws.rank(
+            agg_dok, method='btl', prefit=True, max_iter=100, tol=1e-5)
     """
     cnt = cnt.tocsr()
     x0 = None
@@ -696,13 +572,13 @@ def ranking_btl(cnt: scipy.sparse.csr_matrix,
     x, flag = mle_btl_sparse(cnt, x0=x0, max_iter=max_iter, tol=tol)
 
     # sort, larger state probabilities are better
-    ranked = np.argsort(-x)  # maximize
-    ordids = np.array(indices)[ranked].tolist()
-    scores = x[ranked]
+    positions = np.argsort(-x)  # maximize
+    sortedids = np.array(indices)[positions]
+    metrics = x[positions]
 
-    # calibrate scores (only minmax!)
-    if calibration == 'minmax':
-        scores = minmax(scores)
+    # informations
+    info = {}
+    info["weights"] = x
 
     # done
-    return ranked.tolist(), ordids, scores.tolist(), x
+    return positions, sortedids, metrics, info
diff --git a/bwsample/utils.py b/bwsample/utils.py
index 5d62324..a198a9c 100644
--- a/bwsample/utils.py
+++ b/bwsample/utils.py
@@ -1,8 +1,11 @@
-import itertools
-import scipy.sparse
-import numpy as np
-from typing import Dict, Tuple, List
-ItemID = str
+import itertools  # to_scipy
+import scipy.sparse  # to_scipy
+import numpy as np  # to_scipy, calibrate
+import sklearn.linear_model  # adjustscore
+import sklearn.preprocessing  # adjustscore
+import scipy.special  # adjustscore
+from typing import Dict, Tuple, List, Optional
+ItemID = str  # add_dok
 
 
 def to_scipy(dok: Dict[Tuple[str, str], int], dtype=np.float64) -> (
@@ -79,3 +82,60 @@ def add_dok(a: Dict[Tuple[ItemID, ItemID], int],
             out[key] = val + out.get(key, 0)
     # done
     return out
+
+
+def adjustscore(scores: np.array,
+                method: Optional[str] = 'quantile',
+                n_quantiles: Optional[int] = 10000,
+                labels: Optional[np.array] = None) -> np.array:
+    """Wrapper function to adjust scores
+
+    Parameters:
+    -----------
+    scores: np.array
+        The scores generated by a model.
+
+    method: str (Default: None)
+        The calibration algorithm:
+            - 'quantile' -- sklearn's quantile transform
+            - 'sig3iqr' -- sigmoid 3x sklearn's robust scaler with (25%,75%)
+            - 'platt' -- calibrate scores with the binary labels (Platt, 1999)
+
+    n_quantiles: Optional[int] = 10000
+        Parameter for `method='quantile'`
+
+    labels: Optional[np.array]
+        For `method='platt'`. The binary labels that are supposed to be
+          classified by the scores.
+
+    Return:
+    -------
+    adjusted : np.array
+        The adjusted scores
+
+    References:
+    -----------
+    Platt, J., 1999. Probabilistic outputs for support vector machines and
+        comparisons to regularized likelihood methods.
+    """
+    scores = np.array(scores)
+    labels = np.array(labels)
+
+    if method == 'quantile':
+        return sklearn.preprocessing.quantile_transform(
+            X=scores.reshape(-1, 1),
+            n_quantiles=min(n_quantiles, len(scores)),
+            output_distribution='uniform')
+
+    elif method == 'sig3iqr':
+        adjusted = sklearn.preprocessing.robust_scale(
+            X=scores, quantile_range=(25, 75))
+        return scipy.special.expit(3 * adjusted)
+
+    elif method == 'platt':
+        cls = sklearn.linear_model.LogisticRegression()
+        cls.fit(X=scores.reshape(-1, 1), y=labels)
+        return cls.predict_proba(scores.reshape(-1, 1))[:, 1]
+
+    else:
+        raise Exception(f"The method='{method}' is not implemented.")
diff --git a/test/test_adjustscore.py b/test/test_adjustscore.py
new file mode 100644
index 0000000..4b7a187
--- /dev/null
+++ b/test/test_adjustscore.py
@@ -0,0 +1,41 @@
+import bwsample as bws
+import numpy as np
+import random
+
+
+def test1():
+    scores = [random.random() for _ in range(1000)]
+    adjusted = bws.adjustscore(scores, method='quantile')
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+
+
+def test2():
+    scores = [.1, .3, .5, .7]
+    adjusted = bws.adjustscore(scores, method='quantile')
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+
+
+def test3():
+    scores = [random.random() for _ in range(1000)]
+    adjusted = bws.adjustscore(scores, method='sig3iqr')
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+
+
+def test4():
+    scores = [.1, .3, .5, .7]
+    adjusted = bws.adjustscore(scores, method='sig3iqr')
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+
+
+def test5():
+    scores = [random.random() for _ in range(1000)]
+    labels = [s > 0.5 for s in scores]
+    adjusted = bws.adjustscore(scores, method='platt', labels=labels)
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+
+
+def test6():
+    scores = [.1, .3, .5, .7]
+    labels = [s > 0.5 for s in scores]
+    adjusted = bws.adjustscore(scores, method='platt', labels=labels)
+    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
diff --git a/test/test_rank.py b/test/test_rank.py
index b36c7d3..68c14bb 100644
--- a/test/test_rank.py
+++ b/test/test_rank.py
@@ -3,46 +3,44 @@
 
 def test1():
     # demo data
-    data = (
+    evaluations = (
         ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
         ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),
         ([2, 0, 0, 1], ['A', 'B', 'C', 'D']),
         ([0, 1, 2, 0], ['A', 'B', 'C', 'D']),
         ([0, 1, 0, 2], ['A', 'B', 'C', 'D']),
     )
-    dok, _ = bws.counting.direct_extract_batch(data)
+    dok, _, _, _, _ = bws.count(evaluations)
 
     # possible settings
     settings = [
-        {"method": "ratio", "avg": "all", "calibration": "platt"},
-        {"method": "ratio", "avg": "exist", "calibration": "platt"},
-        {"method": "ratio", "avg": "all", "calibration": "isotonic"},
-        {"method": "ratio", "avg": "exist", "calibration": "isotonic"},
-        {"method": "ratio", "avg": "all", "calibration": "minmax"},
-        {"method": "ratio", "avg": "exist", "calibration": "minmax"},
-        {"method": "pvalue", "avg": "all", "calibration": "platt"},
-        {"method": "pvalue", "avg": "exist", "calibration": "platt"},
-        {"method": "pvalue", "avg": "all", "calibration": "isotonic"},
-        {"method": "pvalue", "avg": "exist", "calibration": "isotonic"},
-        {"method": "pvalue", "avg": "all", "calibration": "minmax"},
-        {"method": "pvalue", "avg": "exist", "calibration": "minmax"},
-        {"method": "btl", "calibration": "platt"},
-        {"method": "btl", "calibration": "isotonic"},
-        {"method": "btl", "calibration": "minmax"},
-        {"method": "orme", "calibration": "platt"},
-        {"method": "orme", "calibration": "isotonic"},
-        {"method": "orme", "calibration": "minmax"},
-        {"method": "eigen", "calibration": "platt"},
-        {"method": "eigen", "calibration": "isotonic"},
-        {"method": "eigen", "calibration": "minmax"},
-        {"method": "transition", "calibration": "platt"},
-        {"method": "transition", "calibration": "isotonic"},
-        {"method": "transition", "calibration": "minmax"}
+        {"method": "ratio", "avg": "all", "adjust": "platt"},
+        {"method": "ratio", "avg": "exist", "adjust": "platt"},
+        {"method": "ratio", "avg": "all", "adjust": "quantile"},
+        {"method": "ratio", "avg": "exist", "adjust": "quantile"},
+        {"method": "ratio", "avg": "all", "adjust": "sig3iqr"},
+        {"method": "ratio", "avg": "exist", "adjust": "sig3iqr"},
+        {"method": "pvalue", "avg": "all", "adjust": "platt"},
+        {"method": "pvalue", "avg": "exist", "adjust": "platt"},
+        {"method": "pvalue", "avg": "all", "adjust": "quantile"},
+        {"method": "pvalue", "avg": "exist", "adjust": "quantile"},
+        {"method": "pvalue", "avg": "all", "adjust": "sig3iqr"},
+        {"method": "pvalue", "avg": "exist", "adjust": "sig3iqr"},
+        {"method": "btl", "adjust": "platt"},
+        {"method": "btl", "adjust": "quantile"},
+        {"method": "btl", "adjust": "sig3iqr"},
+        {"method": "eigen", "adjust": "platt"},
+        {"method": "eigen", "adjust": "quantile"},
+        {"method": "eigen", "adjust": "sig3iqr"},
+        {"method": "trans", "adjust": "platt"},
+        {"method": "trans", "adjust": "quantile"},
+        {"method": "trans", "adjust": "sig3iqr"}
     ]
 
     # loop over each setting
     for setting in settings:
-        ranked, ordids, scores, _ = bws.rank(dok, **setting)
-        assert len(ranked) == 4
-        assert len(ordids) == 4
+        positions, sortedids, metrics, scores, info = bws.rank(dok, **setting)
+        assert len(positions) == 4
+        assert len(sortedids) == 4
+        assert len(metrics) == 4
         assert len(scores) == 4

From e861fb0bcdd6b1132f5e0468e867b2e5b9bf4f86 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Sat, 13 Mar 2021 18:33:03 +0100
Subject: [PATCH 4/7] example notebook added

---
 docs/rank.ipynb | 325 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 docs/rank.ipynb

diff --git a/docs/rank.ipynb b/docs/rank.ipynb
new file mode 100644
index 0000000..71d4a73
--- /dev/null
+++ b/docs/rank.ipynb
@@ -0,0 +1,325 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "educational-surge",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('..')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "adequate-flash",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bwsample as bws\n",
+    "import numpy as np\n",
+    "\n",
+    "#import matplotlib.pyplot as plt\n",
+    "#%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "simplified-finding",
+   "metadata": {},
+   "source": [
+    "# Prepare toy data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "otherwise-asset",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluations = (\n",
+    "    ([1, 0, 0, 2], ['A', 'B', 'C', 'D']),\n",
+    "    ([1, 0, 0, 2], ['A', 'B', 'C', 'D']), \n",
+    "    ([2, 0, 0, 1], ['A', 'B', 'C', 'D']), \n",
+    "    ([1, 2, 0, 0], ['D', 'E', 'F', 'A']),\n",
+    "    ([0, 2, 1, 0], ['D', 'E', 'F', 'A']),\n",
+    "    ([0, 0, 1, 2], ['D', 'E', 'F', 'A'])\n",
+    ")\n",
+    "\n",
+    "dok, _, _, _, _ = bws.count(evaluations)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "northern-copyright",
+   "metadata": {},
+   "source": [
+    "# Simple Ratios"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "demographic-nashville",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Positions: [5 3 1 2 0 4]\n",
+      "Ordered IDs: ['F' 'D' 'B' 'C' 'A' 'E']\n",
+      "     Scores: [1.  0.8 0.5 0.5 0.2 0. ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "positions, sortedids, metrics, scores, info = bws.rank(\n",
+    "    dok, method='ratio', avg='exist', adjust='quantile')\n",
+    "\n",
+    "#print(np.max(scores), np.min(scores))\n",
+    "#plt.hist(scores);\n",
+    "\n",
+    "print(f\"  Positions: {positions}\") \n",
+    "print(f\"Ordered IDs: {sortedids}\") \n",
+    "print(f\"     Scores: {scores}\") "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "metric-blast",
+   "metadata": {},
+   "source": [
+    "# 1-Minus p-Values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "spatial-lexington",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Positions: [5 3 1 2 0 4]\n",
+      "Ordered IDs: ['F' 'D' 'B' 'C' 'A' 'E']\n",
+      "     Scores: [1.  0.8 0.5 0.5 0.2 0. ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "positions, sortedids, metrics, scores, info = bws.rank(\n",
+    "    dok, method='pvalue', avg='exist', adjust='quantile')\n",
+    "\n",
+    "print(f\"  Positions: {positions}\") \n",
+    "print(f\"Ordered IDs: {sortedids}\") \n",
+    "print(f\"     Scores: {scores}\") "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "universal-recall",
+   "metadata": {},
+   "source": [
+    "# Bradley-Terry-Luce (BTL) model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "continuing-underwear",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Positions: [4 5 3 0 1 2]\n",
+      "Ordered IDs: ['E' 'F' 'D' 'A' 'B' 'C']\n",
+      "     Scores: [1.  0.8 0.6 0.4 0.  0. ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "positions, sortedids, metrics, scores, info = bws.rank(\n",
+    "    dok, method='btl', adjust='quantile')\n",
+    "\n",
+    "print(f\"  Positions: {positions}\") \n",
+    "print(f\"Ordered IDs: {sortedids}\") \n",
+    "print(f\"     Scores: {scores}\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "limiting-mouth",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimated MLE parameters: [0.10324378 0.07756639 0.07756639 0.13433648 0.36627794 0.24100903] \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Estimated MLE parameters: {info['weights']}\", \"\\n\") "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "undefined-tattoo",
+   "metadata": {},
+   "source": [
+    "# Eigenvector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "described-examination",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Positions: [5 3 1 2 0 4]\n",
+      "Ordered IDs: ['F' 'D' 'B' 'C' 'A' 'E']\n",
+      "     Scores: [1.    0.8   0.575 0.4   0.2   0.   ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "positions, sortedids, metrics, scores, info = bws.rank(\n",
+    "    dok, method='eigen', adjust='quantile')\n",
+    "\n",
+    "print(f\"  Positions: {positions}\") \n",
+    "print(f\"Ordered IDs: {sortedids}\") \n",
+    "print(f\"     Scores: {scores}\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "artistic-worse",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimated eigenvector: [0.1991372  0.21750303 0.21750303 0.25644901 0.03970756 0.89352474]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Estimated eigenvector: {np.abs(np.real(info['eigenvec'].reshape(-1)))}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "typical-shanghai",
+   "metadata": {},
+   "source": [
+    "# Transition Simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "demanding-origin",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Positions: [5 3 0 2 1 4]\n",
+      "Ordered IDs: ['F' 'D' 'A' 'C' 'B' 'E']\n",
+      "     Scores: [1.  0.8 0.6 0.4 0.2 0. ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "positions, sortedids, metrics, scores, info = bws.rank(\n",
+    "    dok, method='trans', adjust='quantile')\n",
+    "\n",
+    "print(f\"  Positions: {positions}\") \n",
+    "print(f\"Ordered IDs: {sortedids}\") \n",
+    "print(f\"     Scores: {scores}\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "handled-theorem",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Simulated state probabilities: [0.11996576 0.10923823 0.10923823 0.12359208 0.04837442 0.13556368]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Simulated state probabilities: {info['sim']}\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "imposed-identity",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimated Transition Matrix:\n",
+      "[[0.399 0.079 0.079 0.088 0.012 0.111]\n",
+      " [0.078 0.398 0.084 0.119 0.036 0.117]\n",
+      " [0.078 0.084 0.398 0.119 0.036 0.117]\n",
+      " [0.115 0.055 0.055 0.395 0.026 0.082]\n",
+      " [0.085 0.087 0.087 0.125 0.378 0.126]\n",
+      " [0.107 0.107 0.107 0.057 0.01  0.407]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Estimated Transition Matrix:\")\n",
+    "print(info['transmat'].todense().round(3))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 9c39ee69f2e3fa7de20cf557b0f61a83ad8780c6 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Sat, 13 Mar 2021 18:33:16 +0100
Subject: [PATCH 5/7] formatting issues fixed

---
 bwsample/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bwsample/utils.py b/bwsample/utils.py
index a198a9c..2077e94 100644
--- a/bwsample/utils.py
+++ b/bwsample/utils.py
@@ -125,17 +125,17 @@ def adjustscore(scores: np.array,
         return sklearn.preprocessing.quantile_transform(
             X=scores.reshape(-1, 1),
             n_quantiles=min(n_quantiles, len(scores)),
-            output_distribution='uniform')
+            output_distribution='uniform').reshape(-1)
 
     elif method == 'sig3iqr':
         adjusted = sklearn.preprocessing.robust_scale(
             X=scores, quantile_range=(25, 75))
-        return scipy.special.expit(3 * adjusted)
+        return scipy.special.expit(3 * adjusted).reshape(-1)
 
     elif method == 'platt':
         cls = sklearn.linear_model.LogisticRegression()
         cls.fit(X=scores.reshape(-1, 1), y=labels)
-        return cls.predict_proba(scores.reshape(-1, 1))[:, 1]
+        return cls.predict_proba(scores.reshape(-1, 1))[:, 1].reshape(-1)
 
     else:
         raise Exception(f"The method='{method}' is not implemented.")

From 13a5fe47239a1f5d378a794635f5ace6bc708065 Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Sat, 13 Mar 2021 18:33:31 +0100
Subject: [PATCH 6/7] matplotlib added

---
 requirements-demo.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements-demo.txt b/requirements-demo.txt
index 8b63c34..90fe6a5 100644
--- a/requirements-demo.txt
+++ b/requirements-demo.txt
@@ -1,3 +1,4 @@
 # packages required to run example notebooks
 jupyterlab>=3.0.5
 pandas>=1.1.5
+matplotlib>=1.15.0

From 1ea31ed0895fd8d34e86a132439550787dc324bf Mon Sep 17 00:00:00 2001
From: UH <554c46@gmail.com>
Date: Sat, 13 Mar 2021 18:38:23 +0100
Subject: [PATCH 7/7] readme updated for ranking

---
 README.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d1d9557..d7a6125 100644
--- a/README.md
+++ b/README.md
@@ -143,9 +143,21 @@ The function `bwsample.rank` computes a python index variable with a proposed or
 
 ```python
 import bwsample as bws
-ranked, ordids, scores, info = bws.ranking(dok, method='ratio')
+ranked, ordids, scores, info = bws.ranking(dok, method='ratio', adjust='quantile')
 ```
 
+**Available methods:**
+Computed from extracted pairs:
+
+- `'ratio'` -- Simple ratios for each pair, and sum ratios for each item.
+- `'pvalue'` -- Chi-Squared based p-value for each pair, and sum 1-pval for each item.
+- `'btl'` -- Bradley-Terry-Luce (BTL) model estimated with MM algorithm (Hunter, 2004).
+- `'eigen'` -- Eigenvectors of the reciprocal pairwise comparison matrix (Saaty, 2003).
+- `'trans'` -- Estimate transition probability of the next item to be better.
+
+The implementations `ratio`, `pvalue`, `'btl'`, `'eigen'`, and `'trans'` are fully based on sparse matrix operations and `scipy.sparse` algorithms, and avoid accidental conversions to dense matrices.
+
+
 **References:**
 
 - Eigenvector solution in: Saaty, T. L. (2003). Decision-making with the AHP: Why is the principal eigenvector nec- essary. European Journal of Operational Research, 145(1), 85–91. [https://doi.org/10.1016/S0377-2217(02)00227-8](https://doi.org/10.1016/S0377-2217(02)00227-8)