Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_docstring_args(fd, file_name, func_name, class_name=None):
Extract docstring parameters from function definition
"""
docstring = ast.get_docstring(fd)
if len(re.findall(r"Parameters", docstring)) != 1:
if docstring is None or len(re.findall(r"Parameters", docstring)) != 1:
msg = "Missing required 'Parameters' section in docstring in \n"
msg += f"file: {file_name}\n"
if class_name is not None:
Expand Down
37 changes: 6 additions & 31 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
from numba import cuda, njit, prange
from scipy import linalg
from scipy.ndimage import maximum_filter1d, minimum_filter1d
from scipy.signal import convolve
from scipy.spatial.distance import cdist

from . import config
from . import config, sdp

try:
from numba.cuda.cudadrv.driver import _raise_driver_not_found
Expand Down Expand Up @@ -652,7 +651,8 @@ def check_window_size(m, max_size=None, n=None):
@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _sliding_dot_product(Q, T):
"""
A Numba JIT-compiled implementation of the sliding window dot product.
A wrapper for numba JIT-compiled implementation of
the sliding dot product.

Parameters
----------
Expand All @@ -667,18 +667,12 @@ def _sliding_dot_product(Q, T):
out : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
m = Q.shape[0]
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
out[i] = np.dot(Q, T[i : i + m])

return out
return sdp._njit_sliding_dot_product(Q, T)


def sliding_dot_product(Q, T):
"""
Use FFT convolution to calculate the sliding window dot product.
A wrapper for convolution implementation of the sliding dot product.

Parameters
----------
Expand All @@ -692,27 +686,8 @@ def sliding_dot_product(Q, T):
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.

Notes
-----
Calculate the sliding dot product

`DOI: 10.1109/ICDM.2016.0179 \
<https://www.cs.ucr.edu/~eamonn/PID4481997_extend_Matrix%20Profile_I.pdf>`__

See Table I, Figure 4

Following the inverse FFT, Fig. 4 states that only cells [m-1:n]
contain valid dot products

Padding is done automatically in fftconvolve step
"""
n = T.shape[0]
m = Q.shape[0]
Qr = np.flipud(Q) # Reverse/flip Q
QT = convolve(Qr, T)

return QT.real[m - 1 : n]
return sdp._convolve_sliding_dot_product(Q, T)


@njit(
Expand Down
101 changes: 101 additions & 0 deletions stumpy/sdp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import numpy as np
from numba import njit
from scipy.fft import next_fast_len
from scipy.fft._pocketfft.basic import c2r, r2c
from scipy.signal import convolve

from . import config


@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _njit_sliding_dot_product(Q, T):
"""
A Numba JIT-compiled implementation of the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
out : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
m = len(Q)
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
result = 0.0
for j in range(m):
result += Q[j] * T[i + j]
out[i] = result

return out


def _convolve_sliding_dot_product(Q, T):
"""
Use FFT or direct convolution to calculate the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.

Notes
-----
Calculate the sliding dot product

`DOI: 10.1109/ICDM.2016.0179 \
<https://www.cs.ucr.edu/~eamonn/PID4481997_extend_Matrix%20Profile_I.pdf>`__

See Table I, Figure 4
"""
# mode='valid' returns output of convolution where the two
# sequences fully overlap.

return convolve(np.flipud(Q), T, mode="valid")


def _pocketfft_sliding_dot_product(Q, T):
"""
Use scipy.fft._pocketfft to compute
the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
n = len(T)
m = len(Q)
next_fast_n = next_fast_len(n, real=True)

tmp = np.empty((2, next_fast_n))
tmp[0, :m] = Q[::-1]
tmp[0, m:] = 0.0
tmp[1, :n] = T
tmp[1, n:] = 0.0
fft_2d = r2c(True, tmp, axis=-1)

return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n]
174 changes: 174 additions & 0 deletions tests/test_sdp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import inspect
import warnings
from operator import eq, lt

import numpy as np
import pytest
from numpy import testing as npt
from scipy.fft import next_fast_len

from stumpy import sdp

# README
# Real FFT algorithm performs more efficiently when the length
# of the input array `arr` is composed of small prime factors.
# The next_fast_len(arr, real=True) function from Scipy returns
# the same length if len(arr) is composed of a subset of
# prime numbers 2, 3, 5. Therefore, these radices are
# considered as the most efficient for the real FFT algorithm.

# To ensure that the tests cover different cases, the following cases
# are considered:
# 1. len(T) is even, and len(T) == next_fast_len(len(T), real=True)
# 2. len(T) is odd, and len(T) == next_fast_len(len(T), real=True)
# 3. len(T) is even, and len(T) < next_fast_len(len(T), real=True)
# 4. len(T) is odd, and len(T) < next_fast_len(len(T), real=True)
# And 5. a special case of 1, where len(T) is power of 2.

# Therefore:
# 1. len(T) is composed of 2 and a subset of {3, 5}
# 2. len(T) is composed of a subset of {3, 5}
# 3. len(T) is composed of a subset of {7, 11, 13, ...} and 2
# 4. len(T) is composed of a subset of {7, 11, 13, ...}
# 5. len(T) is power of 2

# In some cases, the prime factors are raised to a power of
# certain degree to increase the length of array to be around
# 1000-2000. This allows us to test sliding_dot_product for
# wider range of query lengths.

test_inputs = [
# Input format:
# (
# len(T),
# remainder, # from `len(T) % 2`
# comparator, # for len(T) comparator next_fast_len(len(T), real=True)
# )
(
2 * (3**2) * (5**3),
0,
eq,
), # = 2250, Even `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`
(
(3**2) * (5**3),
1,
eq,
), # = 1125, Odd `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`.
(
2 * 7 * 11 * 13,
0,
lt,
), # = 2002, Even `len(T)`, and `len(T) < next_fast_len(len(T), real=True)`
(
7 * 11 * 13,
1,
lt,
), # = 1001, Odd `len(T)`, and `len(T) < next_fast_len(len(T), real=True)`
]


def naive_sliding_dot_product(Q, T):
m = len(Q)
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
out[i] = np.dot(Q, T[i : i + m])
return out


def get_sdp_functions():
out = []
for func_name, func in inspect.getmembers(sdp, inspect.isfunction):
if func_name.endswith("sliding_dot_product"):
out.append((func_name, func))

return out


@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs)
def test_remainder(n_T, remainder, comparator):
assert n_T % 2 == remainder


@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs)
def test_comparator(n_T, remainder, comparator):
shape = next_fast_len(n_T, real=True)
assert comparator(n_T, shape)


@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs)
def test_sdp(n_T, remainder, comparator):
# test_sdp for cases 1-4

n_Q_prime = [
2,
3,
5,
7,
11,
13,
17,
19,
23,
29,
31,
37,
41,
43,
47,
53,
59,
61,
67,
71,
73,
79,
83,
89,
97,
]
n_Q_power2 = [2, 4, 8, 16, 32, 64]
n_Q_values = n_Q_prime + n_Q_power2 + [n_T]
n_Q_values = sorted(n_Q for n_Q in set(n_Q_values) if n_Q <= n_T)

# utils.import_sdp_mods()
for n_Q in n_Q_values:
Q = np.random.rand(n_Q)
T = np.random.rand(n_T)
ref = naive_sliding_dot_product(Q, T)
for func_name, func in get_sdp_functions():
try:
comp = func(Q, T)
npt.assert_allclose(comp, ref)
except Exception as e: # pragma: no cover
msg = f"Error in {func_name}, with n_Q={n_Q} and n_T={n_T}"
warnings.warn(msg)
raise e

return


def test_sdp_power2():
# test for case 5. len(T) is power of 2
pmin = 3
pmax = 13

for func_name, func in get_sdp_functions():
try:
for q in range(pmin, pmax + 1):
n_Q = 2**q
for p in range(q, pmax + 1):
n_T = 2**p
Q = np.random.rand(n_Q)
T = np.random.rand(n_T)

ref = naive_sliding_dot_product(Q, T)
comp = func(Q, T)
npt.assert_allclose(comp, ref)

except Exception as e: # pragma: no cover
msg = f"Error in {func_name}, with q={q} and p={p}"
warnings.warn(msg)
raise e

return