add torch dtw

cjekel · Nov 18, 2023 · 96fb200 · 96fb200
1 parent 3ceb3a1
commit 96fb200
Show file tree

Hide file tree

Showing 3 changed files with 230 additions and 0 deletions.
diff --git a/benchmark.py b/benchmark.py
@@ -0,0 +1,80 @@
+# https://github.com/nucccc/benchmark_similarity_measures/blob/main/run_benchmark.py
+import similaritymeasures
+import torch
+import json
+import timeit
+import numpy as np
+
+benchmark_result : dict[str:dict[str:any]] = dict()
+
+x1 = np.linspace(0.0, 1.0, 500)
+y1 = np.ones(500)*2
+x2 = np.linspace(0.0, 1.0, 250)
+y2 = np.ones(250)
+
+np.random.seed(1212121)
+curve_a_rand = np.random.random((100, 2))
+curve_b_rand = np.random.random((90, 2))
+
+curve1 = np.array((x1, y1)).T
+curve2 = np.array((x2, y2)).T
+
+curve1t = torch.from_numpy(curve1)
+curve2t = torch.from_numpy(curve2)
+
+r1 = 10
+r2 = 100
+theta = np.linspace(0.0, 2.0*np.pi, 500)
+x1 = np.cos(theta)*r1
+x2 = np.cos(theta)*r2
+y1 = np.sin(theta)*r1
+y2 = np.sin(theta)*r2
+curve5 = np.array((x1, y1)).T
+curve6 = np.array((x2, y2)).T
+curve5t = torch.from_numpy(curve5)
+curve6t = torch.from_numpy(curve6)
+
+
+def run_dtw_c1_c2():
+    return similaritymeasures.dtw(curve1, curve2)
+
+def run_dtw_c5_c6():
+    return similaritymeasures.dtw(curve5, curve6)
+
+def run_dtw_c1_c2t():
+    return similaritymeasures.dtwtorch(curve1t, curve2t)
+
+def run_dtw_c5_c6t():
+    return similaritymeasures.dtwtorch(curve5t, curve6t)
+
+bnchmks = {
+    'dtw_c1_c2':run_dtw_c1_c2,
+    'dtw_c5_c6':run_dtw_c5_c6,
+    'dtw_c1_c2t':run_dtw_c1_c2t,
+    'dtw_c5_c6t':run_dtw_c5_c6t,
+}
+
+n_repeats = 50
+n_runs = 20
+
+for name, func in bnchmks.items():
+
+    times_list = timeit.repeat(func, repeat = n_repeats, number = n_runs)
+
+    total = sum(times_list)
+    avg = total / len(times_list)
+
+    func_result = {
+        'times_list':times_list,
+        'total':total,
+        'avg':avg
+    }
+
+    benchmark_result[name] = func_result
+
+result_encoded = json.dumps(benchmark_result)
+print(result_encoded)
+with open('benchmark.json', 'w') as f:
+    json.dump(result_encoded, f)
+# with open('benchmark_no_jit.json', 'w') as f:
+#     json.dump(result_encoded, f)
diff --git a/similaritymeasures/similaritymeasures.py b/similaritymeasures/similaritymeasures.py
@@ -1,6 +1,7 @@
 from __future__ import division
 import numpy as np
 from scipy.spatial import distance
+import torch
 # MIT License
 #
 # Copyright (c) 2018,2019 Charles Jekel
@@ -797,6 +798,140 @@ def dtw(exp_data, num_data, metric='euclidean', **kwargs):
     return d[-1, -1], d
 
 
+def dtwtorch(exp_data, num_data):
+    r"""
+    Compute the Dynamic Time Warping distance.
+
+    This computes a generic Dynamic Time Warping (DTW) distance and follows
+    the algorithm from [1]_. This can use all distance metrics that are
+    available in scipy.spatial.distance.cdist.
+
+    Parameters
+    ----------
+    exp_data : array_like
+        Curve from your experimental data. exp_data is of (M, N) shape, where
+        M is the number of data points, and N is the number of dimmensions
+    num_data : array_like
+        Curve from your numerical data. num_data is of (P, N) shape, where P
+        is the number of data points, and N is the number of dimmensions
+    metric : str or callable, optional
+        The distance metric to use. Default='euclidean'. Refer to the
+        documentation for scipy.spatial.distance.cdist. Some examples:
+        'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
+        'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
+        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
+        'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+        'wminkowski', 'yule'.
+    **kwargs : dict, optional
+        Extra arguments to `metric`: refer to each metric documentation in
+        scipy.spatial.distance.
+        Some examples:
+
+        p : scalar
+            The p-norm to apply for Minkowski, weighted and unweighted.
+            Default: 2.
+
+        w : ndarray
+            The weight vector for metrics that support weights (e.g.,
+            Minkowski).
+
+        V : ndarray
+            The variance vector for standardized Euclidean.
+            Default: var(vstack([XA, XB]), axis=0, ddof=1)
+
+        VI : ndarray
+            The inverse of the covariance matrix for Mahalanobis.
+            Default: inv(cov(vstack([XA, XB].T))).T
+
+        out : ndarray
+            The output array
+            If not None, the distance matrix Y is stored in this array.
+
+    Returns
+    -------
+    r : float
+        DTW distance.
+    d : ndarray (2-D)
+        Cumulative distance matrix
+
+    Notes
+    -----
+    The DTW distance is d[-1, -1].
+
+    This has O(M, P) computational cost.
+
+    The latest scipy.spatial.distance.cdist information can be found at
+    https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html
+
+    Your x locations of data points should be exp_data[:, 0], and the y
+    locations of the data points should be exp_data[:, 1]. Same for num_data.
+
+    This uses the euclidean distance for now. In the future it should be
+    possible to support other metrics.
+
+    DTW is a non-metric distance, which means DTW doesn't hold the triangle
+    inequality.
+    https://en.wikipedia.org/wiki/Triangle_inequality
+
+    References
+    ----------
+    .. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
+        and Computer Science Department University of Hawaii at Manoa Honolulu,
+        USA, 855, pp.1-23.
+        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
+
+    Examples
+    --------
+    >>> # Generate random experimental data
+    >>> x = np.random.random(100)
+    >>> y = np.random.random(100)
+    >>> exp_data = np.zeros((100, 2))
+    >>> exp_data[:, 0] = x
+    >>> exp_data[:, 1] = y
+    >>> # Generate random numerical data
+    >>> x = np.random.random(100)
+    >>> y = np.random.random(100)
+    >>> num_data = np.zeros((100, 2))
+    >>> num_data[:, 0] = x
+    >>> num_data[:, 1] = y
+    >>> r, d = dtw(exp_data, num_data)
+
+    The euclidean distance is used by default. You can use metric and **kwargs
+    to specify different types of distance metrics. The following example uses
+    the city block or Manhattan distance between points.
+
+    >>> r, d = dtw(exp_data, num_data, metric='cityblock')
+
+    """
+    c = torch.cdist(exp_data, num_data)
+
+    d = torch.zeros(c.shape)
+    d[0, 0] = c[0, 0]
+    n, m = c.shape
+    for i in range(1, n):
+        d[i, 0] = d[i-1, 0] + c[i, 0]
+    for j in range(1, m):
+        d[0, j] = d[0, j-1] + c[0, j]
+    for i in range(1, n):
+        for j in range(1, m):
+            # d[i, j] = c[i, j] + torch.stack(
+            #     [d[i-1, j], d[i, j-1], d[i-1, j-1]]
+            # ).min()
+            d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
+    return d[-1, -1], d
+
+
+# dtwtorch = torch.jit.trace(
+#     dtwtorch,
+#     (
+#         # torch.randn(3, 2), torch.randn(3, 2),
+#         # torch.randn(10, 2), torch.randn(100, 2),
+#         torch.randn(10, 20), torch.randn(15, 20),
+
+#     )
+# )
+
+
 def dtw_path(d):
     r"""
     Calculates the optimal DTW path from a given DTW cumulative distance

diff --git a/tests/tests.py b/tests/tests.py
@@ -1,4 +1,5 @@
 import numpy as np
+import torch
 import unittest
 import similaritymeasures
 from scipy.spatial.distance import cdist
@@ -27,6 +28,8 @@
 P = np.array([[0, 0], [1, 1], [2, 2]])
 Q = P.copy()
 Q[:, 1] = Q[:, 1] + 1
+Pt = torch.from_numpy(P).double()
+Qt = torch.from_numpy(Q).double()
 
 r1 = 10
 r2 = 100
@@ -37,6 +40,8 @@
 y2 = np.sin(theta)*r2
 curve5 = np.array((x1, y1)).T
 curve6 = np.array((x2, y2)).T
+curve5t = torch.from_numpy(curve5)
+curve6t = torch.from_numpy(curve6)
 
 
 class TestEverything(unittest.TestCase):
@@ -95,10 +100,20 @@ def test_P_Q_dtw(self):
         r, _ = similaritymeasures.dtw(P, Q)
         self.assertTrue(r, 3.0)
 
+    def test_P_Q_dtw_torch(self):
+        r, _ = similaritymeasures.dtwtorch(Pt, Qt)
+        print(r)
+        self.assertTrue(r, 3.0)
+
     def test_c5_c6_dtw(self):
         r, _ = similaritymeasures.dtw(curve5, curve6)
         self.assertTrue(np.isclose(r, 9000.0))
 
+    def test_c5_c6_dtw_torch(self):
+        r, _ = similaritymeasures.dtwtorch(curve5t, curve6t)
+        print(r)
+        self.assertTrue(np.isclose(r, 9000.0))
+
     def test_c5_c6_df(self):
         df = similaritymeasures.frechet_dist(curve5, curve6)
         self.assertTrue(np.isclose(df, 90.0))