From 404cf26d9cf13eae2b29a40c3391f074d20d2abc Mon Sep 17 00:00:00 2001 From: sw241395 Date: Sat, 20 May 2023 15:44:54 +0100 Subject: [PATCH 1/3] Updated depreciated np.int too np.int_ --- requirements.txt | 2 +- saxpy/hotsax.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8585981..dba5fca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ numpy pytest pytest-cov codecov -sklearn +scikit-learn scipy diff --git a/saxpy/hotsax.py b/saxpy/hotsax.py index 3de164e..1a9c7e3 100644 --- a/saxpy/hotsax.py +++ b/saxpy/hotsax.py @@ -54,7 +54,7 @@ def find_best_discord_hotsax(series, win_size, global_registry, sax_data, magic_ distance_calls = 0 - visit_array = np.zeros(len(series), dtype=np.int) + visit_array = np.zeros(len(series), dtype=np.int_) """[4.0] and we are off iterating over the magic array entries""" for entry in magic_array: From 24c3ba73642e2ad54fbad911032ae9eb849c29bc Mon Sep 17 00:00:00 2001 From: sw241395 Date: Sat, 20 May 2023 17:06:37 +0100 Subject: [PATCH 2/3] Updating the number of SAX sections from 20 to 26 --- saxpy/alphabet.py | 118 ++++++++++------------------------------ saxpy/strfunc.py | 2 +- tests/test_cuts.py | 2 +- tests/test_str.py | 3 +- tests/test_ts2string.py | 21 +++++-- 5 files changed, 47 insertions(+), 99 deletions(-) diff --git a/saxpy/alphabet.py b/saxpy/alphabet.py index 2bec0ac..b3f3720 100644 --- a/saxpy/alphabet.py +++ b/saxpy/alphabet.py @@ -1,98 +1,36 @@ """Implements Alphabet cuts.""" import numpy as np - +import scipy def cuts_for_asize(a_size): - """Generate a set of alphabet cuts for its size.""" - """ Typically, we generate cuts in R as follows: + """Generate a set of alphabet cuts for its size. + + Typically, we generate cuts in R as follows: get_cuts_for_num <- function(num) { - cuts = c(-Inf) - for (i in 1:(num-1)) { - cuts = c(cuts, qnorm(i * 1/num)) - } - cuts + cuts = c(-Inf) + for (i in 1:(num-1)) { + cuts = c(cuts, qnorm(i * 1/num)) + } + cuts } - get_cuts_for_num(3) """ - options = { - 2: np.array([-np.inf, 0.00]), - 3: np.array([-np.inf, -0.4307273, 0.4307273]), - 4: np.array([-np.inf, -0.6744898, 0, 0.6744898]), - 5: np.array([-np.inf, -0.841621233572914, -0.2533471031358, - 0.2533471031358, 0.841621233572914]), - 6: np.array([-np.inf, -0.967421566101701, -0.430727299295457, 0, - 0.430727299295457, 0.967421566101701]), - 7: np.array([-np.inf, -1.06757052387814, -0.565948821932863, - -0.180012369792705, 0.180012369792705, 0.565948821932863, - 1.06757052387814]), - 8: np.array([-np.inf, -1.15034938037601, -0.674489750196082, - -0.318639363964375, 0, 0.318639363964375, - 0.674489750196082, 1.15034938037601]), - 9: np.array([-np.inf, -1.22064034884735, -0.764709673786387, - -0.430727299295457, -0.139710298881862, 0.139710298881862, - 0.430727299295457, 0.764709673786387, 1.22064034884735]), - 10: np.array([-np.inf, -1.2815515655446, -0.841621233572914, - -0.524400512708041, -0.2533471031358, 0, 0.2533471031358, - 0.524400512708041, 0.841621233572914, 1.2815515655446]), - 11: np.array([-np.inf, -1.33517773611894, -0.908457868537385, - -0.604585346583237, -0.348755695517045, - -0.114185294321428, 0.114185294321428, 0.348755695517045, - 0.604585346583237, 0.908457868537385, 1.33517773611894]), - 12: np.array([-np.inf, -1.38299412710064, -0.967421566101701, - -0.674489750196082, -0.430727299295457, - -0.210428394247925, 0, 0.210428394247925, - 0.430727299295457, 0.674489750196082, 0.967421566101701, - 1.38299412710064]), - 13: np.array([-np.inf, -1.42607687227285, -1.0200762327862, - -0.736315917376129, -0.502402223373355, - -0.293381232121193, -0.0965586152896391, - 0.0965586152896394, 0.293381232121194, 0.502402223373355, - 0.73631591737613, 1.0200762327862, 1.42607687227285]), - 14: np.array([-np.inf, -1.46523379268552, -1.06757052387814, - -0.791638607743375, -0.565948821932863, -0.36610635680057, - -0.180012369792705, 0, 0.180012369792705, - 0.36610635680057, 0.565948821932863, 0.791638607743375, - 1.06757052387814, 1.46523379268552]), - 15: np.array([-np.inf, -1.50108594604402, -1.11077161663679, - -0.841621233572914, -0.622925723210088, - -0.430727299295457, -0.2533471031358, -0.0836517339071291, - 0.0836517339071291, 0.2533471031358, 0.430727299295457, - 0.622925723210088, 0.841621233572914, 1.11077161663679, - 1.50108594604402]), - 16: np.array([-np.inf, -1.53412054435255, -1.15034938037601, - -0.887146559018876, -0.674489750196082, - -0.488776411114669, -0.318639363964375, - -0.157310684610171, 0, 0.157310684610171, - 0.318639363964375, 0.488776411114669, 0.674489750196082, - 0.887146559018876, 1.15034938037601, 1.53412054435255]), - 17: np.array([-np.inf, -1.5647264713618, -1.18683143275582, - -0.928899491647271, -0.721522283982343, - -0.541395085129088, -0.377391943828554, - -0.223007830940367, -0.0737912738082727, - 0.0737912738082727, 0.223007830940367, 0.377391943828554, - 0.541395085129088, 0.721522283982343, 0.928899491647271, - 1.18683143275582, 1.5647264713618]), - 18: np.array([-np.inf, -1.59321881802305, -1.22064034884735, - -0.967421566101701, -0.764709673786387, - -0.589455797849779, -0.430727299295457, - -0.282216147062508, -0.139710298881862, 0, - 0.139710298881862, 0.282216147062508, 0.430727299295457, - 0.589455797849779, 0.764709673786387, 0.967421566101701, - 1.22064034884735, 1.59321881802305]), - 19: np.array([-np.inf, -1.61985625863827, -1.25211952026522, - -1.00314796766253, -0.8045963803603, -0.633640000779701, - -0.47950565333095, -0.336038140371823, -0.199201324789267, - -0.0660118123758407, 0.0660118123758406, - 0.199201324789267, 0.336038140371823, 0.47950565333095, - 0.633640000779701, 0.8045963803603, 1.00314796766253, - 1.25211952026522, 1.61985625863827]), - 20: np.array([-np.inf, -1.64485362695147, -1.2815515655446, - -1.03643338949379, -0.841621233572914, -0.674489750196082, - -0.524400512708041, -0.385320466407568, -0.2533471031358, - -0.125661346855074, 0, 0.125661346855074, 0.2533471031358, - 0.385320466407568, 0.524400512708041, 0.674489750196082, - 0.841621233572914, 1.03643338949379, 1.2815515655446, - 1.64485362695147]), - } + get_cuts_for_num(3) + + Args: + a_size (int): Number of sections to cut into + (Must be between 2 and 26) + + Returns: + numpy.array: Numpy array of the threshold of the cuts + """ + + # Only limit too 26 sections as there are only 26 letters in the alphabet + if a_size > 26 or a_size < 2: + raise ValueError("'a_size must be between 2 and 26'") + + break_points = [-np.inf] - return options[a_size] + for i in range(1, a_size): + break_points.append(scipy.stats.norm.ppf(i/a_size)) + + return np.array(break_points) diff --git a/saxpy/strfunc.py b/saxpy/strfunc.py index 724b36e..181dc51 100644 --- a/saxpy/strfunc.py +++ b/saxpy/strfunc.py @@ -3,7 +3,7 @@ def idx2letter(idx): """Convert a numerical index to a char.""" - if 0 <= idx < 20: + if 0 <= idx < 26: return chr(97 + idx) else: raise ValueError('A wrong idx value supplied.') diff --git a/tests/test_cuts.py b/tests/test_cuts.py index d13935f..ee0ac0c 100644 --- a/tests/test_cuts.py +++ b/tests/test_cuts.py @@ -4,5 +4,5 @@ def test_sizing(): """Test alphabet sizes.""" - for s in range(2, 20): + for s in range(2, 26): assert len(alphabet.cuts_for_asize(s)) == s diff --git a/tests/test_str.py b/tests/test_str.py index e5883d5..68f4ed1 100644 --- a/tests/test_str.py +++ b/tests/test_str.py @@ -8,9 +8,10 @@ def test_sizing(): assert 'a' == idx2letter(0) assert 'h' == idx2letter(7) assert 't' == idx2letter(19) + assert 'z' == idx2letter(25) with pytest.raises(ValueError, match=r'.* idx'): idx2letter(-1) with pytest.raises(ValueError, match=r'.* idx .*'): - idx2letter(20) + idx2letter(26) diff --git a/tests/test_ts2string.py b/tests/test_ts2string.py index 1be26f0..5164ad3 100644 --- a/tests/test_ts2string.py +++ b/tests/test_ts2string.py @@ -6,18 +6,27 @@ def test_stringing(): """Test string conversion.""" - # 11: np.array([-np.inf, -1.33517773611894, -0.908457868537385, - # -0.604585346583237, -0.348755695517045, - # -0.114185294321428, 0.114185294321428, 0.348755695517045, - # 0.604585346583237, 0.908457868537385, 1.33517773611894]), - ab = sax.ts_to_string(np.array([-1.33517773611895, -1.33517773611894]), + # 11: np.array([-inf, -1.33517774, -0.90845787, -0.60458535, + # -0.3487557, -0.11418529, 0.11418529, 0.3487557, + # 0.60458535, 0.90845787, 1.33517774]), + + ab = sax.ts_to_string(np.array([-1.33517775 , -1.33517773]), alphabet.cuts_for_asize(11)) assert 'ab' == ab - kj = sax.ts_to_string(np.array([1.33517773611895, 1.33517773611894]), + kj = sax.ts_to_string(np.array([1.33517775, 1.33517773]), alphabet.cuts_for_asize(11)) assert 'kj' == kj + # Test to handel cuts of size 26 + print(alphabet.cuts_for_asize(26)) + + yz = sax.ts_to_string(np.array([1.76882503, 1.76882505]), + alphabet.cuts_for_asize(26)) + + assert 'yz' == yz + + def test_mindist(): """Test MINDIST.""" From 42c9fbb37dbb9d932d1756fe0370f73ab28980e7 Mon Sep 17 00:00:00 2001 From: sw241395 Date: Sat, 27 May 2023 16:52:19 +0100 Subject: [PATCH 3/3] Updating int_ too int64 --- saxpy/hotsax.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saxpy/hotsax.py b/saxpy/hotsax.py index 1a9c7e3..d73eb95 100644 --- a/saxpy/hotsax.py +++ b/saxpy/hotsax.py @@ -54,7 +54,7 @@ def find_best_discord_hotsax(series, win_size, global_registry, sax_data, magic_ distance_calls = 0 - visit_array = np.zeros(len(series), dtype=np.int_) + visit_array = np.zeros(len(series), dtype=np.int64) """[4.0] and we are off iterating over the magic array entries""" for entry in magic_array: