Skip to content

Commit

Permalink
add torch dtw
Browse files Browse the repository at this point in the history
  • Loading branch information
cjekel committed Nov 18, 2023
1 parent 3ceb3a1 commit 96fb200
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 0 deletions.
80 changes: 80 additions & 0 deletions benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# https://github.com/nucccc/benchmark_similarity_measures/blob/main/run_benchmark.py
import similaritymeasures
import torch
import json
import timeit
import numpy as np

benchmark_result : dict[str:dict[str:any]] = dict()

x1 = np.linspace(0.0, 1.0, 500)
y1 = np.ones(500)*2
x2 = np.linspace(0.0, 1.0, 250)
y2 = np.ones(250)

np.random.seed(1212121)
curve_a_rand = np.random.random((100, 2))
curve_b_rand = np.random.random((90, 2))

curve1 = np.array((x1, y1)).T
curve2 = np.array((x2, y2)).T

curve1t = torch.from_numpy(curve1)
curve2t = torch.from_numpy(curve2)

r1 = 10
r2 = 100
theta = np.linspace(0.0, 2.0*np.pi, 500)
x1 = np.cos(theta)*r1
x2 = np.cos(theta)*r2
y1 = np.sin(theta)*r1
y2 = np.sin(theta)*r2
curve5 = np.array((x1, y1)).T
curve6 = np.array((x2, y2)).T
curve5t = torch.from_numpy(curve5)
curve6t = torch.from_numpy(curve6)


def run_dtw_c1_c2():
return similaritymeasures.dtw(curve1, curve2)

def run_dtw_c5_c6():
return similaritymeasures.dtw(curve5, curve6)

def run_dtw_c1_c2t():
return similaritymeasures.dtwtorch(curve1t, curve2t)

def run_dtw_c5_c6t():
return similaritymeasures.dtwtorch(curve5t, curve6t)

bnchmks = {
'dtw_c1_c2':run_dtw_c1_c2,
'dtw_c5_c6':run_dtw_c5_c6,
'dtw_c1_c2t':run_dtw_c1_c2t,
'dtw_c5_c6t':run_dtw_c5_c6t,
}

n_repeats = 50
n_runs = 20

for name, func in bnchmks.items():

times_list = timeit.repeat(func, repeat = n_repeats, number = n_runs)

total = sum(times_list)
avg = total / len(times_list)

func_result = {
'times_list':times_list,
'total':total,
'avg':avg
}

benchmark_result[name] = func_result

result_encoded = json.dumps(benchmark_result)
print(result_encoded)
with open('benchmark.json', 'w') as f:
json.dump(result_encoded, f)
# with open('benchmark_no_jit.json', 'w') as f:
# json.dump(result_encoded, f)
135 changes: 135 additions & 0 deletions similaritymeasures/similaritymeasures.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import division
import numpy as np
from scipy.spatial import distance
import torch
# MIT License
#
# Copyright (c) 2018,2019 Charles Jekel
Expand Down Expand Up @@ -797,6 +798,140 @@ def dtw(exp_data, num_data, metric='euclidean', **kwargs):
return d[-1, -1], d


def dtwtorch(exp_data, num_data):
r"""
Compute the Dynamic Time Warping distance.
This computes a generic Dynamic Time Warping (DTW) distance and follows
the algorithm from [1]_. This can use all distance metrics that are
available in scipy.spatial.distance.cdist.
Parameters
----------
exp_data : array_like
Curve from your experimental data. exp_data is of (M, N) shape, where
M is the number of data points, and N is the number of dimmensions
num_data : array_like
Curve from your numerical data. num_data is of (P, N) shape, where P
is the number of data points, and N is the number of dimmensions
metric : str or callable, optional
The distance metric to use. Default='euclidean'. Refer to the
documentation for scipy.spatial.distance.cdist. Some examples:
'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
'wminkowski', 'yule'.
**kwargs : dict, optional
Extra arguments to `metric`: refer to each metric documentation in
scipy.spatial.distance.
Some examples:
p : scalar
The p-norm to apply for Minkowski, weighted and unweighted.
Default: 2.
w : ndarray
The weight vector for metrics that support weights (e.g.,
Minkowski).
V : ndarray
The variance vector for standardized Euclidean.
Default: var(vstack([XA, XB]), axis=0, ddof=1)
VI : ndarray
The inverse of the covariance matrix for Mahalanobis.
Default: inv(cov(vstack([XA, XB].T))).T
out : ndarray
The output array
If not None, the distance matrix Y is stored in this array.
Returns
-------
r : float
DTW distance.
d : ndarray (2-D)
Cumulative distance matrix
Notes
-----
The DTW distance is d[-1, -1].
This has O(M, P) computational cost.
The latest scipy.spatial.distance.cdist information can be found at
https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html
Your x locations of data points should be exp_data[:, 0], and the y
locations of the data points should be exp_data[:, 1]. Same for num_data.
This uses the euclidean distance for now. In the future it should be
possible to support other metrics.
DTW is a non-metric distance, which means DTW doesn't hold the triangle
inequality.
https://en.wikipedia.org/wiki/Triangle_inequality
References
----------
.. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
and Computer Science Department University of Hawaii at Manoa Honolulu,
USA, 855, pp.1-23.
http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
Examples
--------
>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r, d = dtw(exp_data, num_data)
The euclidean distance is used by default. You can use metric and **kwargs
to specify different types of distance metrics. The following example uses
the city block or Manhattan distance between points.
>>> r, d = dtw(exp_data, num_data, metric='cityblock')
"""
c = torch.cdist(exp_data, num_data)

d = torch.zeros(c.shape)
d[0, 0] = c[0, 0]
n, m = c.shape
for i in range(1, n):
d[i, 0] = d[i-1, 0] + c[i, 0]
for j in range(1, m):
d[0, j] = d[0, j-1] + c[0, j]
for i in range(1, n):
for j in range(1, m):
# d[i, j] = c[i, j] + torch.stack(
# [d[i-1, j], d[i, j-1], d[i-1, j-1]]
# ).min()
d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
return d[-1, -1], d


# dtwtorch = torch.jit.trace(
# dtwtorch,
# (
# # torch.randn(3, 2), torch.randn(3, 2),
# # torch.randn(10, 2), torch.randn(100, 2),
# torch.randn(10, 20), torch.randn(15, 20),

# )
# )


def dtw_path(d):
r"""
Calculates the optimal DTW path from a given DTW cumulative distance
Expand Down
15 changes: 15 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import torch
import unittest
import similaritymeasures
from scipy.spatial.distance import cdist
Expand Down Expand Up @@ -27,6 +28,8 @@
P = np.array([[0, 0], [1, 1], [2, 2]])
Q = P.copy()
Q[:, 1] = Q[:, 1] + 1
Pt = torch.from_numpy(P).double()
Qt = torch.from_numpy(Q).double()

r1 = 10
r2 = 100
Expand All @@ -37,6 +40,8 @@
y2 = np.sin(theta)*r2
curve5 = np.array((x1, y1)).T
curve6 = np.array((x2, y2)).T
curve5t = torch.from_numpy(curve5)
curve6t = torch.from_numpy(curve6)


class TestEverything(unittest.TestCase):
Expand Down Expand Up @@ -95,10 +100,20 @@ def test_P_Q_dtw(self):
r, _ = similaritymeasures.dtw(P, Q)
self.assertTrue(r, 3.0)

def test_P_Q_dtw_torch(self):
r, _ = similaritymeasures.dtwtorch(Pt, Qt)
print(r)
self.assertTrue(r, 3.0)

def test_c5_c6_dtw(self):
r, _ = similaritymeasures.dtw(curve5, curve6)
self.assertTrue(np.isclose(r, 9000.0))

def test_c5_c6_dtw_torch(self):
r, _ = similaritymeasures.dtwtorch(curve5t, curve6t)
print(r)
self.assertTrue(np.isclose(r, 9000.0))

def test_c5_c6_df(self):
df = similaritymeasures.frechet_dist(curve5, curve6)
self.assertTrue(np.isclose(df, 90.0))
Expand Down

0 comments on commit 96fb200

Please sign in to comment.