From 3919b53d29a9161293d95c9a268a0e34973fe108 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 21 Jan 2026 22:56:42 -0500 Subject: [PATCH 1/4] revise sliding dot product functions --- stumpy/core.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 4f286f706..67af29378 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -652,7 +652,7 @@ def check_window_size(m, max_size=None, n=None): @njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _sliding_dot_product(Q, T): """ - A Numba JIT-compiled implementation of the sliding window dot product. + A Numba JIT-compiled implementation of the sliding dot product. Parameters ---------- @@ -667,18 +667,21 @@ def _sliding_dot_product(Q, T): out : numpy.ndarray Sliding dot product between `Q` and `T`. """ - m = Q.shape[0] + m = len(Q) l = T.shape[0] - m + 1 out = np.empty(l) for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) + result = 0.0 + for j in range(m): + result += Q[j] * T[i + j] + out[i] = result return out def sliding_dot_product(Q, T): """ - Use FFT convolution to calculate the sliding window dot product. + Use FFT or direct convolution to calculate the sliding dot product. Parameters ---------- @@ -701,18 +704,11 @@ def sliding_dot_product(Q, T): `__ See Table I, Figure 4 - - Following the inverse FFT, Fig. 4 states that only cells [m-1:n] - contain valid dot products - - Padding is done automatically in fftconvolve step """ - n = T.shape[0] - m = Q.shape[0] - Qr = np.flipud(Q) # Reverse/flip Q - QT = convolve(Qr, T) + # mode='valid' returns output of convolution where the two + # sequences fully overlap. - return QT.real[m - 1 : n] + return convolve(np.flipud(Q), T, mode="valid") @njit( From 3c70d4774a4c8be0ea2b799fdcc7bcda40b925a0 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 23 Jan 2026 21:12:31 -0500 Subject: [PATCH 2/4] Moved functions to sdp module --- stumpy/core.py | 33 +++++-------------------- stumpy/sdp.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 stumpy/sdp.py diff --git a/stumpy/core.py b/stumpy/core.py index 43f3f9b01..1b3d99c21 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -12,10 +12,9 @@ from numba import cuda, njit, prange from scipy import linalg from scipy.ndimage import maximum_filter1d, minimum_filter1d -from scipy.signal import convolve from scipy.spatial.distance import cdist -from . import config +from . import config, sdp try: from numba.cuda.cudadrv.driver import _raise_driver_not_found @@ -652,7 +651,8 @@ def check_window_size(m, max_size=None, n=None): @njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _sliding_dot_product(Q, T): """ - A Numba JIT-compiled implementation of the sliding dot product. + A wrapper for numba JIT-compiled implementation of + the sliding dot product. Parameters ---------- @@ -667,21 +667,12 @@ def _sliding_dot_product(Q, T): out : numpy.ndarray Sliding dot product between `Q` and `T`. """ - m = len(Q) - l = T.shape[0] - m + 1 - out = np.empty(l) - for i in range(l): - result = 0.0 - for j in range(m): - result += Q[j] * T[i + j] - out[i] = result - - return out + return sdp._njit_sliding_dot_product(Q, T) def sliding_dot_product(Q, T): """ - Use FFT or direct convolution to calculate the sliding dot product. + A wrapper for convolution implementation of the sliding dot product. Parameters ---------- @@ -695,20 +686,8 @@ def sliding_dot_product(Q, T): ------- output : numpy.ndarray Sliding dot product between `Q` and `T`. - - Notes - ----- - Calculate the sliding dot product - - `DOI: 10.1109/ICDM.2016.0179 \ - `__ - - See Table I, Figure 4 """ - # mode='valid' returns output of convolution where the two - # sequences fully overlap. - - return convolve(np.flipud(Q), T, mode="valid") + return sdp._convolve_sliding_dot_product(Q, T) @njit( diff --git a/stumpy/sdp.py b/stumpy/sdp.py new file mode 100644 index 000000000..f48a0d00a --- /dev/null +++ b/stumpy/sdp.py @@ -0,0 +1,67 @@ +import numpy as np +from numba import njit +from scipy.signal import convolve + +from . import config + + +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) +def _njit_sliding_dot_product(Q, T): + """ + A Numba JIT-compiled implementation of the sliding dot product. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + Returns + ------- + out : numpy.ndarray + Sliding dot product between `Q` and `T`. + """ + m = len(Q) + l = T.shape[0] - m + 1 + out = np.empty(l) + for i in range(l): + result = 0.0 + for j in range(m): + result += Q[j] * T[i + j] + out[i] = result + + return out + + +def _convolve_sliding_dot_product(Q, T): + """ + Use FFT or direct convolution to calculate the sliding dot product. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + Returns + ------- + output : numpy.ndarray + Sliding dot product between `Q` and `T`. + + Notes + ----- + Calculate the sliding dot product + + `DOI: 10.1109/ICDM.2016.0179 \ + `__ + + See Table I, Figure 4 + """ + # mode='valid' returns output of convolution where the two + # sequences fully overlap. + + return convolve(np.flipud(Q), T, mode="valid") From 203869e1e4bf1a7dd3c3753a946a7c9de3d45ad1 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 24 Jan 2026 00:40:49 -0500 Subject: [PATCH 3/4] add sdp functions and their tests --- stumpy/sdp.py | 34 +++++++++ tests/test_sdp.py | 174 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 tests/test_sdp.py diff --git a/stumpy/sdp.py b/stumpy/sdp.py index f48a0d00a..696a7f971 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -1,5 +1,7 @@ import numpy as np from numba import njit +from scipy.fft import next_fast_len +from scipy.fft._pocketfft.basic import c2r, r2c from scipy.signal import convolve from . import config @@ -65,3 +67,35 @@ def _convolve_sliding_dot_product(Q, T): # sequences fully overlap. return convolve(np.flipud(Q), T, mode="valid") + + +def _pocketfft_sliding_dot_product(Q, T): + """ + Use scipy.fft._pocketfft to compute + the sliding dot product. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + Returns + ------- + output : numpy.ndarray + Sliding dot product between `Q` and `T`. + """ + n = len(T) + m = len(Q) + next_fast_n = next_fast_len(n, real=True) + + tmp = np.empty((2, next_fast_n)) + tmp[0, :m] = Q[::-1] + tmp[0, m:] = 0.0 + tmp[1, :n] = T + tmp[1, n:] = 0.0 + fft_2d = r2c(True, tmp, axis=-1) + + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] diff --git a/tests/test_sdp.py b/tests/test_sdp.py new file mode 100644 index 000000000..8c9b8fd9b --- /dev/null +++ b/tests/test_sdp.py @@ -0,0 +1,174 @@ +import inspect +import warnings +from operator import eq, lt + +import numpy as np +import pytest +from numpy import testing as npt +from scipy.fft import next_fast_len + +from stumpy import sdp + +# README +# Real FFT algorithm performs more efficiently when the length +# of the input array `arr` is composed of small prime factors. +# The next_fast_len(arr, real=True) function from Scipy returns +# the same length if len(arr) is composed of a subset of +# prime numbers 2, 3, 5. Therefore, these radices are +# considered as the most efficient for the real FFT algorithm. + +# To ensure that the tests cover different cases, the following cases +# are considered: +# 1. len(T) is even, and len(T) == next_fast_len(len(T), real=True) +# 2. len(T) is odd, and len(T) == next_fast_len(len(T), real=True) +# 3. len(T) is even, and len(T) < next_fast_len(len(T), real=True) +# 4. len(T) is odd, and len(T) < next_fast_len(len(T), real=True) +# And 5. a special case of 1, where len(T) is power of 2. + +# Therefore: +# 1. len(T) is composed of 2 and a subset of {3, 5} +# 2. len(T) is composed of a subset of {3, 5} +# 3. len(T) is composed of a subset of {7, 11, 13, ...} and 2 +# 4. len(T) is composed of a subset of {7, 11, 13, ...} +# 5. len(T) is power of 2 + +# In some cases, the prime factors are raised to a power of +# certain degree to increase the length of array to be around +# 1000-2000. This allows us to test sliding_dot_product for +# wider range of query lengths. + +test_inputs = [ + # Input format: + # ( + # len(T), + # remainder, # from `len(T) % 2` + # comparator, # for len(T) comparator next_fast_len(len(T), real=True) + # ) + ( + 2 * (3**2) * (5**3), + 0, + eq, + ), # = 2250, Even `len(T)`, and `len(T) == next_fast_len(len(T), real=True)` + ( + (3**2) * (5**3), + 1, + eq, + ), # = 1125, Odd `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`. + ( + 2 * 7 * 11 * 13, + 0, + lt, + ), # = 2002, Even `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` + ( + 7 * 11 * 13, + 1, + lt, + ), # = 1001, Odd `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` +] + + +def naive_sliding_dot_product(Q, T): + m = len(Q) + l = T.shape[0] - m + 1 + out = np.empty(l) + for i in range(l): + out[i] = np.dot(Q, T[i : i + m]) + return out + + +def get_sdp_functions(): + out = [] + for func_name, func in inspect.getmembers(sdp, inspect.isfunction): + if func_name.endswith("sliding_dot_product"): + out.append((func_name, func)) + + return out + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_remainder(n_T, remainder, comparator): + assert n_T % 2 == remainder + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_comparator(n_T, remainder, comparator): + shape = next_fast_len(n_T, real=True) + assert comparator(n_T, shape) + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_sdp(n_T, remainder, comparator): + # test_sdp for cases 1-4 + + n_Q_prime = [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + ] + n_Q_power2 = [2, 4, 8, 16, 32, 64] + n_Q_values = n_Q_prime + n_Q_power2 + [n_T] + n_Q_values = sorted(n_Q for n_Q in set(n_Q_values) if n_Q <= n_T) + + # utils.import_sdp_mods() + for n_Q in n_Q_values: + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + ref = naive_sliding_dot_product(Q, T) + for func_name, func in get_sdp_functions(): + try: + comp = func(Q, T) + npt.assert_allclose(comp, ref) + except Exception as e: # pragma: no cover + msg = f"Error in {func_name}, with n_Q={n_Q} and n_T={n_T}" + warnings.warn(msg) + raise e + + return + + +def test_sdp_power2(): + # test for case 5. len(T) is power of 2 + pmin = 3 + pmax = 13 + + for func_name, func in get_sdp_functions(): + try: + for q in range(pmin, pmax + 1): + n_Q = 2**q + for p in range(q, pmax + 1): + n_T = 2**p + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + + ref = naive_sliding_dot_product(Q, T) + comp = func(Q, T) + npt.assert_allclose(comp, ref) + + except Exception as e: # pragma: no cover + msg = f"Error in {func_name}, with q={q} and p={p}" + warnings.warn(msg) + raise e + + return From a2b5d4ed7e36f7587e99b7b6486d4db90a3634e5 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 24 Jan 2026 00:41:46 -0500 Subject: [PATCH 4/4] minor fix for cases without any docstring --- docstring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docstring.py b/docstring.py index e16a92d81..3c72633c9 100755 --- a/docstring.py +++ b/docstring.py @@ -10,7 +10,7 @@ def get_docstring_args(fd, file_name, func_name, class_name=None): Extract docstring parameters from function definition """ docstring = ast.get_docstring(fd) - if len(re.findall(r"Parameters", docstring)) != 1: + if docstring is None or len(re.findall(r"Parameters", docstring)) != 1: msg = "Missing required 'Parameters' section in docstring in \n" msg += f"file: {file_name}\n" if class_name is not None: