From 59fb014173b9234efcf729835d14d20b0babcdef Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 19 Jan 2026 07:16:35 -0800 Subject: [PATCH 1/3] Migrate third party tests from unittest to pytest --- .../cupy/binary_tests/test_elementwise.py | 4 +- .../cupy/binary_tests/test_packing.py | 4 +- .../cupy/core_tests/test_array_function.py | 4 +- .../third_party/cupy/core_tests/test_core.py | 9 +- .../cupy/core_tests/test_internal.py | 179 +++++++----------- 5 files changed, 83 insertions(+), 117 deletions(-) diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py index 431f1f27d8c7..95ec1d4374c8 100644 --- a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py +++ b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py @@ -1,11 +1,9 @@ from __future__ import annotations -import unittest - from dpnp.tests.third_party.cupy import testing -class TestElementwise(unittest.TestCase): +class TestElementwise: @testing.for_int_dtypes() @testing.numpy_cupy_array_equal() diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py index a72a8a558b08..518e74d98868 100644 --- a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py +++ b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py @@ -1,4 +1,4 @@ -import unittest +from __future__ import annotations import numpy import pytest @@ -11,7 +11,7 @@ ) -class TestPacking(unittest.TestCase): +class TestPacking: @testing.for_int_dtypes() @testing.numpy_cupy_array_equal() diff --git a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py index 7878a5a0aaff..f0c45900bcda 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py @@ -1,4 +1,4 @@ -import unittest +from __future__ import annotations import numpy import pytest @@ -11,7 +11,7 @@ ) -class TestArrayFunction(unittest.TestCase): +class TestArrayFunction: @testing.with_requires("numpy>=1.17.0") def test_array_function(self): diff --git a/dpnp/tests/third_party/cupy/core_tests/test_core.py b/dpnp/tests/third_party/cupy/core_tests/test_core.py index c959b4f24954..d9f8196db562 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_core.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_core.py @@ -1,20 +1,23 @@ from __future__ import annotations import sys -import unittest import numpy import pytest import dpnp as cupy + +# from cupy._core import core from dpnp.tests.third_party.cupy import testing from dpnp.tests.third_party.cupy.testing._protocol_helpers import ( DummyObjectWithCudaArrayInterface, DummyObjectWithCuPyGetNDArray, ) +# from cupy_tests.core_tests import test_raw + -class TestSize(unittest.TestCase): +class TestSize: # def tearDown(self): # # Free huge memory for slow test @@ -58,7 +61,7 @@ def test_size_huge(self, xp): @pytest.mark.skip("no cupy._core submodule") -class TestOrder(unittest.TestCase): +class TestOrder: @testing.for_orders(_orders.keys()) def test_ndarray(self, order): diff --git a/dpnp/tests/third_party/cupy/core_tests/test_internal.py b/dpnp/tests/third_party/cupy/core_tests/test_internal.py index 205661e80d75..2f4239a2e220 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_internal.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_internal.py @@ -1,18 +1,16 @@ -import math -import unittest +from __future__ import annotations import numpy import pytest # from cupy._core import internal -from dpnp.tests.third_party.cupy import testing pytest.skip( "CuPy internal functions are not supported", allow_module_level=True ) -class TestProd(unittest.TestCase): +class TestProd: def test_empty(self): assert internal.prod([]) == 1 @@ -24,7 +22,7 @@ def test_two(self): assert internal.prod([2, 3]) == 6 -class TestProdSequence(unittest.TestCase): +class TestProdSequence: def test_empty(self): assert internal.prod_sequence(()) == 1 @@ -74,7 +72,7 @@ def test_float(self): assert internal.get_size(1.0) == (1.0,) -class TestVectorEqual(unittest.TestCase): +class TestVectorEqual: def test_empty(self): assert internal.vector_equal([], []) is True @@ -89,7 +87,7 @@ def test_different_size(self): assert internal.vector_equal([1, 2, 3], [1, 2]) is False -class TestGetCContiguity(unittest.TestCase): +class TestGetCContiguity: def test_zero_in_shape(self): assert internal.get_c_contiguity((1, 0, 1), (1, 1, 1), 3) @@ -122,134 +120,101 @@ def test_no_contiguous3(self): assert not internal.get_c_contiguity((3, 1, 3), (6, 6, 4), 2) -class TestInferUnknownDimension(unittest.TestCase): +class TestInferUnknownDimension: def test_known_all(self): assert internal.infer_unknown_dimension((1, 2, 3), 6) == [1, 2, 3] def test_multiple_unknown(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): internal.infer_unknown_dimension((-1, 1, -1), 10) def test_infer(self): assert internal.infer_unknown_dimension((-1, 2, 3), 12) == [2, 2, 3] -@testing.parameterize( - {"slice": (2, 8, 1), "expect": (2, 8, 1)}, - {"slice": (2, None, 1), "expect": (2, 10, 1)}, - {"slice": (2, 1, 1), "expect": (2, 2, 1)}, - {"slice": (2, -1, 1), "expect": (2, 9, 1)}, - {"slice": (None, 8, 1), "expect": (0, 8, 1)}, - {"slice": (-3, 8, 1), "expect": (7, 8, 1)}, - {"slice": (11, 8, 1), "expect": (10, 10, 1)}, - {"slice": (11, 11, 1), "expect": (10, 10, 1)}, - {"slice": (-11, 8, 1), "expect": (0, 8, 1)}, - {"slice": (-11, -11, 1), "expect": (0, 0, 1)}, - {"slice": (8, 2, -1), "expect": (8, 2, -1)}, - {"slice": (8, None, -1), "expect": (8, -1, -1)}, - {"slice": (8, 9, -1), "expect": (8, 8, -1)}, - {"slice": (8, -3, -1), "expect": (8, 7, -1)}, - {"slice": (None, 8, -1), "expect": (9, 8, -1)}, - {"slice": (-3, 6, -1), "expect": (7, 6, -1)}, - {"slice": (10, 10, -1), "expect": (9, 9, -1)}, - {"slice": (10, 8, -1), "expect": (9, 8, -1)}, - {"slice": (9, 10, -1), "expect": (9, 9, -1)}, - {"slice": (9, 9, -1), "expect": (9, 9, -1)}, - {"slice": (9, 8, -1), "expect": (9, 8, -1)}, - {"slice": (8, 8, -1), "expect": (8, 8, -1)}, - {"slice": (-9, -8, -1), "expect": (1, 1, -1)}, - {"slice": (-9, -9, -1), "expect": (1, 1, -1)}, - {"slice": (-9, -10, -1), "expect": (1, 0, -1)}, - {"slice": (-9, -11, -1), "expect": (1, -1, -1)}, - {"slice": (-9, -12, -1), "expect": (1, -1, -1)}, - {"slice": (-10, -9, -1), "expect": (0, 0, -1)}, - {"slice": (-10, -10, -1), "expect": (0, 0, -1)}, - {"slice": (-10, -11, -1), "expect": (0, -1, -1)}, - {"slice": (-10, -12, -1), "expect": (0, -1, -1)}, - {"slice": (-11, 8, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -9, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -10, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -11, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -12, -1), "expect": (-1, -1, -1)}, +@pytest.mark.parametrize( + ("slice_", "expect"), + [ + ((2, 8, 1), (2, 8, 1)), + ((2, None, 1), (2, 10, 1)), + ((2, 1, 1), (2, 2, 1)), + ((2, -1, 1), (2, 9, 1)), + ((None, 8, 1), (0, 8, 1)), + ((-3, 8, 1), (7, 8, 1)), + ((11, 8, 1), (10, 10, 1)), + ((11, 11, 1), (10, 10, 1)), + ((-11, 8, 1), (0, 8, 1)), + ((-11, -11, 1), (0, 0, 1)), + ((8, 2, -1), (8, 2, -1)), + ((8, None, -1), (8, -1, -1)), + ((8, 9, -1), (8, 8, -1)), + ((8, -3, -1), (8, 7, -1)), + ((None, 8, -1), (9, 8, -1)), + ((-3, 6, -1), (7, 6, -1)), + ((10, 10, -1), (9, 9, -1)), + ((10, 8, -1), (9, 8, -1)), + ((9, 10, -1), (9, 9, -1)), + ((9, 9, -1), (9, 9, -1)), + ((9, 8, -1), (9, 8, -1)), + ((8, 8, -1), (8, 8, -1)), + ((-9, -8, -1), (1, 1, -1)), + ((-9, -9, -1), (1, 1, -1)), + ((-9, -10, -1), (1, 0, -1)), + ((-9, -11, -1), (1, -1, -1)), + ((-9, -12, -1), (1, -1, -1)), + ((-10, -9, -1), (0, 0, -1)), + ((-10, -10, -1), (0, 0, -1)), + ((-10, -11, -1), (0, -1, -1)), + ((-10, -12, -1), (0, -1, -1)), + ((-11, 8, -1), (-1, -1, -1)), + ((-11, -9, -1), (-1, -1, -1)), + ((-11, -10, -1), (-1, -1, -1)), + ((-11, -11, -1), (-1, -1, -1)), + ((-11, -12, -1), (-1, -1, -1)), + ], ) -class TestCompleteSlice(unittest.TestCase): +def test_complete_slice(slice_, expect): + assert internal.complete_slice(slice(*slice_), 10) == slice(*expect) - def test_complete_slice(self): - assert internal.complete_slice(slice(*self.slice), 10) == slice( - *self.expect - ) - -class TestCompleteSliceError(unittest.TestCase): +class TestCompleteSliceError: def test_invalid_step_value(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): internal.complete_slice(slice(1, 1, 0), 1) def test_invalid_step_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice(1, 1, (1, 2)), 1) def test_invalid_start_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, 1), 1) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, -1), 1) def test_invalid_stop_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, 1), 1) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, -1), 1) -@testing.parameterize( - {"x": 0, "expect": 0}, - {"x": 1, "expect": 1}, - {"x": 2, "expect": 2}, - {"x": 3, "expect": 4}, - {"x": 2**10, "expect": 2**10}, - {"x": 2**10 - 1, "expect": 2**10}, - {"x": 2**10 + 1, "expect": 2**11}, - {"x": 2**40, "expect": 2**40}, - {"x": 2**40 - 1, "expect": 2**40}, - {"x": 2**40 + 1, "expect": 2**41}, +@pytest.mark.parametrize( + ("x", "expect"), + [ + (0, 0), + (1, 1), + (2, 2), + (3, 4), + (2**10, 2**10), + (2**10 - 1, 2**10), + (2**10 + 1, 2**11), + (2**40, 2**40), + (2**40 - 1, 2**40), + (2**40 + 1, 2**41), + ], ) -class TestClp2(unittest.TestCase): - - def test_clp2(self): - assert internal.clp2(self.x) == self.expect - - -@testing.parameterize( - *testing.product( - { - "value": [ - 0.0, - 1.0, - -1.0, - 0.25, - -0.25, - 11.0, - -11.0, - 2**-15, - -(2**-15), # Denormalized Number - float("inf"), - float("-inf"), - ], - } - ) -) -class TestConvertFloat16(unittest.TestCase): - - def test_conversion(self): - half = internal.to_float16(self.value) - assert internal.from_float16(half) == self.value - - -class TestConvertFloat16Nan(unittest.TestCase): - - def test_conversion(self): - half = internal.to_float16(float("nan")) - assert math.isnan(internal.from_float16(half)) +def test_clp2(x, expect): + assert internal.clp2(x) == expect From 7ee68fda2e1ccd2063d53fec62aeafab8ac2df8a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 19 Jan 2026 07:27:59 -0800 Subject: [PATCH 2/3] Update FFT tests due to changes in FFT config to make it more thread-safe --- .../third_party/cupy/fft_tests/test_cache.py | 74 +- .../cupy/fft_tests/test_callback.py | 965 +++++++++++++++--- .../third_party/cupy/fft_tests/test_fft.py | 143 ++- 3 files changed, 940 insertions(+), 242 deletions(-) diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py index e32f8e8305a0..29e0a7724edf 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import contextlib import io import queue @@ -14,7 +16,7 @@ # from cupy.cuda import runtime # from cupy.fft import config -# from .test_fft import (multi_gpu_config, _skip_multi_gpu_bug) +# from .test_fft import multi_gpu_config pytest.skip("FFT cache functions are not supported", allow_module_level=True) @@ -30,26 +32,29 @@ def intercept_stdout(func): class TestPlanCache(unittest.TestCase): - def setUp(self): - self.caches = [] - self.old_sizes = [] + @contextlib.contextmanager + @staticmethod + def prepare_and_restore_caches(): + old_sizes = [] for i in range(n_devices): with device.Device(i): cache = config.get_plan_cache() - self.old_sizes.append(cache.get_size()) + old_sizes.append(cache.get_size()) cache.clear() cache.set_memsize(-1) cache.set_size(2) - self.caches.append(cache) - def tearDown(self): - for i in range(n_devices): - with device.Device(i): - cache = config.get_plan_cache() - cache.clear() - cache.set_size(self.old_sizes[i]) - cache.set_memsize(-1) + try: + yield + finally: + for i in range(n_devices): + with device.Device(i): + cache = config.get_plan_cache() + cache.clear() + cache.set_size(old_sizes[i]) + cache.set_memsize(-1) + @prepare_and_restore_caches() def test_LRU_cache1(self): # test if insertion and clean-up works cache = config.get_plan_cache() @@ -62,6 +67,7 @@ def test_LRU_cache1(self): cache.clear() assert cache.get_curr_size() == 0 <= cache.get_size() + @prepare_and_restore_caches() def test_LRU_cache2(self): # test if plan is reused cache = config.get_plan_cache() @@ -83,6 +89,7 @@ def test_LRU_cache2(self): # we should get the same plan assert plan0 is plan1 + @prepare_and_restore_caches() def test_LRU_cache3(self): # test if cache size is limited cache = config.get_plan_cache() @@ -108,6 +115,7 @@ def test_LRU_cache3(self): for _, node in cache: assert plan is not node.plan + @prepare_and_restore_caches() def test_LRU_cache4(self): # test if fetching the plan will reorder it to the top cache = config.get_plan_cache() @@ -149,6 +157,8 @@ def test_LRU_cache4(self): cache[next(iterator)[0]] @testing.multi_gpu(2) + @prepare_and_restore_caches() + @pytest.mark.thread_unsafe(reason="intercepts stdout") def test_LRU_cache5(self): # test if the LRU cache is thread-local @@ -210,10 +220,13 @@ def thread_init_caches(gpus, queue): assert stdout.count("uninitialized") == n_devices - 2 @testing.multi_gpu(2) - def test_LRU_cache6(self): + @prepare_and_restore_caches() + def test_LRU_cache6(self, gpus=None): # test if each device has a separate cache - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -247,10 +260,13 @@ def test_LRU_cache6(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) - def test_LRU_cache7(self): + @prepare_and_restore_caches() + def test_LRU_cache7(self, gpus=None): # test accessing a multi-GPU plan - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -319,6 +335,7 @@ def test_LRU_cache7(self): assert cache0.get_curr_size() == 1 <= cache0.get_size() assert cache1.get_curr_size() == 2 <= cache1.get_size() + @prepare_and_restore_caches() def test_LRU_cache8(self): # test if Plan1d and PlanNd can coexist in the same cache cache = config.get_plan_cache() @@ -340,6 +357,7 @@ def test_LRU_cache8(self): assert isinstance(next(iterator)[1].plan, cufft.PlanNd) assert isinstance(next(iterator)[1].plan, cufft.Plan1d) + @prepare_and_restore_caches() def test_LRU_cache9(self): # test if memsizes in the cache adds up cache = config.get_plan_cache() @@ -358,6 +376,8 @@ def test_LRU_cache9(self): assert memsize == cache.get_curr_memsize() + @prepare_and_restore_caches() + @pytest.mark.thread_unsafe(reason="intercepts stdout") def test_LRU_cache10(self): # test if deletion works and if show_info() is consistent with data cache = config.get_plan_cache() @@ -406,11 +426,13 @@ def test_LRU_cache10(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) + @prepare_and_restore_caches() def test_LRU_cache11(self): # test if collectively deleting a multi-GPU plan works - _skip_multi_gpu_bug((128,), self.gpus) - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -441,11 +463,14 @@ def test_LRU_cache11(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) + @prepare_and_restore_caches() def test_LRU_cache12(self): # test if an error is raise when one of the caches is unable # to fit it a multi-GPU plan - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -467,6 +492,7 @@ def test_LRU_cache12(self): runtime.runtimeGetVersion() >= 11080, "CUDA 11.8 has different plan size", ) + @prepare_and_restore_caches() def test_LRU_cache13(self): # test if plan insertion respect the memory size limit cache = config.get_plan_cache() diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py index 12bfd4aa73df..e2682d0aaa6e 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py @@ -1,18 +1,62 @@ +from __future__ import annotations + import contextlib +import os import string import sys import tempfile from unittest import mock +try: + import Cython +except ImportError: + Cython = None +else: + if Cython.__version__ < "0.29.0": + Cython = None import numpy as np import pytest import dpnp as cupy from dpnp.tests.third_party.cupy import testing +# from cupy.cuda import cufft +# from cupy.cuda.device import get_compute_capability + pytest.skip("FFT callbacks are not supported", allow_module_level=True) +def cuda_version(): + return cupy.cuda.runtime.runtimeGetVersion() + + +cb_ver_for_test = ("legacy", "jit") + + +def check_should_skip_legacy_test(): + if not sys.platform.startswith("linux"): + pytest.skip("legacy callbacks are only supported on Linux") + if Cython is None: + pytest.skip("no working Cython") + if "LD_PRELOAD" in os.environ: + pytest.skip( + "legacy callback does not work if libcufft.so " "is preloaded" + ) + if cufft.getVersion() >= 12000 and get_compute_capability() == "75": + pytest.skip( + "cuFFT legacy callbacks in CUDA 13.0+ do not support " "cc 7.5" + ) + if cufft.getVersion() == 11303 and get_compute_capability() == "120": + pytest.skip( + "cuFFT legacy callbacks in CUDA 12.8.0 do not support " "cc 12.0" + ) + + +def check_should_skip_jit_test(): + if cufft.getVersion() < 11303: + pytest.skip("JIT callbacks require cuFFT from CUDA 12.8+") + + @contextlib.contextmanager def use_temporary_cache_dir(): target = "cupy.fft._callback.get_cache_dir" @@ -21,45 +65,50 @@ def use_temporary_cache_dir(): yield path +suppress_legacy_warning = pytest.mark.filterwarnings( + "ignore:.*legacy callback.*:DeprecationWarning" +) + + _load_callback = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= 2.5; return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _load_callback_with_aux = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= *((${aux_type}*)callerInfo); return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _load_callback_with_aux2 = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= ((${aux_type}*)callerInfo)[offset]; return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _store_callback = r""" -__device__ void CB_ConvertOutput( - void *dataOut, size_t offset, ${data_type} element, +__device__ void ${cb_name}( + void *dataOut, ${offset_type} offset, ${data_type} element, void *callerInfo, void *sharedPointer) { ${data_type} x = element; @@ -67,12 +116,12 @@ def use_temporary_cache_dir(): ((${data_type}*)dataOut)[offset] = x; } -__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +__device__ ${store_type} d_storeCallbackPtr = ${cb_name}; """ _store_callback_with_aux = r""" -__device__ void CB_ConvertOutput( - void *dataOut, size_t offset, ${data_type} element, +__device__ void ${cb_name}( + void *dataOut, ${offset_type} offset, ${data_type} element, void *callerInfo, void *sharedPointer) { ${data_type} x = element; @@ -80,26 +129,209 @@ def use_temporary_cache_dir(): ((${data_type}*)dataOut)[offset] = x; } -__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +__device__ ${store_type} d_storeCallbackPtr = ${cb_name}; """ -def _set_load_cb(code, element, data_type, callback_type, aux_type=None): - return string.Template(code).substitute( +def _set_load_cb( + code, + element, + data_type, + callback_type, + callback_name, + aux_type=None, + cb_ver="", +): + if cb_ver == "jit": + callback_type = callback_type.replace( + "cufftCallback", "cufftJITCallback" + ) + callback = string.Template(code).substitute( data_type=data_type, aux_type=aux_type, load_type=callback_type, + cb_name=callback_name, element=element, + offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"), ) - - -def _set_store_cb(code, element, data_type, callback_type, aux_type=None): - return string.Template(code).substitute( + if cb_ver == "jit": + callback = "#include \n\n" + callback + return callback + + +def _set_store_cb( + code, + element, + data_type, + callback_type, + callback_name, + aux_type=None, + cb_ver="", +): + if cb_ver == "jit": + callback_type = callback_type.replace( + "cufftCallback", "cufftJITCallback" + ) + callback = string.Template(code).substitute( data_type=data_type, aux_type=aux_type, store_type=callback_type, + cb_name=callback_name, element=element, + offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"), ) + if cb_ver == "jit": + callback = "#include \n\n" + callback + return callback + + +# Note: this class is place here instead of at the end of this file, because +# pytest does not reset warnings internally, and other tests would suppress +# the warnings such that at the end we have no warnings to capture, but we want +# to ensure warnings are raised. +@pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" +) +class TestInputValidationWith1dCallbacks: + + shape = (10,) + norm = "ortho" + dtype = np.complex64 + + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + + def test_fft_load_legacy(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + code = _load_callback + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + cb_load = _set_load_cb(code, *types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with pytest.deprecated_call( + match="legacy callback is considered deprecated" + ): + with cupy.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_ver="legacy" + ): + fft(a, norm=self.norm) + + def test_fft_load_jit_no_name(self): + check_should_skip_jit_test() + + fft = cupy.fft.fft + code = _load_callback + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + cb_load = _set_load_cb(code, *types, cb_ver="jit") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + # We omit passing cb_load_name. The test infra setup would check + # if we can infer it correctly. + with cupy.fft.config.set_cufft_callbacks(cb_load=cb_load, cb_ver="jit"): + fft(a, norm=self.norm) + + def test_fft_store_legacy(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + code = _store_callback + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with pytest.deprecated_call( + match="legacy callback is considered deprecated" + ): + with cupy.fft.config.set_cufft_callbacks( + cb_store=cb_store, cb_ver="legacy" + ): + fft(a, norm=self.norm) + + def test_fft_store_jit_no_name(self): + check_should_skip_jit_test() + + fft = cupy.fft.fft + code = _store_callback + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver="jit") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + # We omit passing cb_store_name. The test infra setup would check + # if we can infer it correctly. + with cupy.fft.config.set_cufft_callbacks( + cb_store=cb_store, cb_ver="jit" + ): + fft(a, norm=self.norm) + + def test_fft_load_store_legacy_aux(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + dtype = self.dtype + load_code = _load_callback_with_aux + store_code = _store_callback_with_aux + load_aux = cupy.asarray(2.5, dtype=cupy.dtype(dtype).char.lower()) + store_aux = cupy.asarray(3.8, dtype=cupy.dtype(dtype).char.lower()) + + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + "float", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + "float", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver="legacy") + cb_store = _set_store_cb(store_code, *store_types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with ( + pytest.deprecated_call( + match="cb_load_aux_arr or cb_store_aux_arr is deprecated" + ), + pytest.deprecated_call( + match="legacy callback is considered deprecated" + ), + ): + with cupy.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_store=cb_store, + cb_load_aux_arr=load_aux, + cb_store_aux_arr=store_aux, + cb_ver="legacy", + ): + fft(a, norm=self.norm) @testing.parameterize( @@ -108,31 +340,60 @@ def _set_store_cb(code, element, data_type, callback_type, aux_type=None): "n": [None, 5, 10, 15], "shape": [(10, 7), (10,), (10, 10)], "norm": [None, "ortho"], + "cb_ver": cb_ver_for_test, } ) ) -@testing.with_requires("cython>=0.29.0") -@pytest.mark.skipif( - not sys.platform.startswith("linux"), - reason="callbacks are only supported on Linux", -) @pytest.mark.skipif( cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" ) class Test1dCallbacks: + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + def _test_load_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) code = _load_callback if dtype == np.complex64: - types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) elif dtype == np.complex128: - types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) elif dtype == np.float32: - types = ("x", "cufftReal", "cufftCallbackLoadR") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") - cb_load = _set_load_cb(code, *types) + types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + else: # float64 + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) + cb_load = _set_load_cb(code, *types, cb_ver=self.cb_ver) + cb_load_name = types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -144,50 +405,93 @@ def _test_load_helper(self, xp, dtype, fft_func): else: out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "irfft") def _test_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: if fft_func != "irfft": - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - types = ("x", "cufftReal", "cufftCallbackStoreR") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float32 for irfft + types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) elif dtype == np.complex128: if fft_func != "irfft": - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + else: # float64 for irfft + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) elif dtype == np.float32: - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) elif dtype == np.float64: - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - cb_store = _set_store_cb(code, *types) + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver=self.cb_ver) + cb_store_name = types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -201,67 +505,134 @@ def _test_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "irfft") def _test_load_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) load_code = _load_callback store_code = _store_callback if fft_func in ("fft", "ifft"): if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) elif fft_func == "rfft": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float64 + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) else: # irfft if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x", "cufftReal", "cufftCallbackStoreR") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") - store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[-1] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -276,47 +647,71 @@ def _test_load_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_store=cb_store - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "irfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_aux(self, xp, dtype): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = xp.fft.fft c = _load_callback_with_aux2 + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: - cb_load = _set_load_cb( - c, "x.x", "cufftComplex", "cufftCallbackLoadC", "float" + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + "float", ) - else: - cb_load = _set_load_cb( - c, "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", "double" + else: # complex128 + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + "double", ) + cb_load = _set_load_cb(c, *types, cb_ver=self.cb_ver) + cb_load_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) out_last = self.n if self.n is not None else self.shape[-1] @@ -333,15 +728,22 @@ def test_fft_load_aux(self, xp, dtype): if dtype in (np.float32, np.complex64): out = out.astype(np.complex64) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_load_aux_arr=b - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_load_data=b.data, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out def _test_load_store_aux_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback_with_aux store_code = _store_callback_with_aux @@ -349,53 +751,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fft", "ifft"): if dtype == np.complex64: load_types = ( "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) elif fft_func == "rfft": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + "float", + ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # float64 load_types = ( "x", "cufftDoubleReal", "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) else: # irfft @@ -404,24 +820,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) - store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") - else: + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + "float", + ) + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x", "cufftDoubleReal", "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", "double", ) - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -436,32 +863,38 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, - cb_store=cb_store, - cb_load_aux_arr=load_aux, - cb_store_aux_arr=store_aux, - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_load_data=load_aux.data, + cb_store_data=store_aux.data, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load_store_aux(self, xp, dtype): @@ -469,38 +902,92 @@ def test_irfft_load_store_aux(self, xp, dtype): @testing.parameterize( - {"shape": (3, 4), "s": None, "axes": None, "norm": None}, - {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None}, - {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None}, - {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"}, - {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None}, - {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1), "norm": None}, - {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1), "norm": None}, - {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"}, - {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2), "norm": "ortho"}, -) -@testing.with_requires("cython>=0.29.0") -@pytest.mark.skipif( - not sys.platform.startswith("linux"), - reason="callbacks are only supported on Linux", + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None, "norm": None}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"}, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None}, + { + "shape": (2, 3, 4), + "s": (1, 4, 10), + "axes": (-3, -2, -1), + "norm": None, + }, + { + "shape": (2, 3, 4), + "s": None, + "axes": (-3, -2, -1), + "norm": None, + }, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"}, + { + "shape": (2, 3, 4), + "s": (2, 3), + "axes": (0, 1, 2), + "norm": "ortho", + }, + ], + testing.product( + { + "cb_ver": cb_ver_for_test, + }, + ), + ) + ) ) @pytest.mark.skipif( cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" ) class TestNdCallbacks: + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + def _test_load_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) load_code = _load_callback if dtype == np.complex64: - types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) elif dtype == np.complex128: - types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) elif dtype == np.float32: - types = ("x", "cufftReal", "cufftCallbackLoadR") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") - cb_load = _set_load_cb(load_code, *types) + types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + else: # float64 + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) + cb_load = _set_load_cb(load_code, *types, cb_ver=self.cb_ver) + cb_load_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -512,12 +999,14 @@ def _test_load_helper(self, xp, dtype, fft_func): else: out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -525,6 +1014,7 @@ def _test_load_helper(self, xp, dtype, fft_func): def test_fftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -532,6 +1022,7 @@ def test_fftn_load(self, xp, dtype): def test_ifftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -539,6 +1030,7 @@ def test_ifftn_load(self, xp, dtype): def test_rfftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -547,23 +1039,61 @@ def test_irfftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "irfftn") def _test_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) store_code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: if fft_func != "irfftn": - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - types = ("x", "cufftReal", "cufftCallbackStoreR") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float32 for irfftn + types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) elif dtype == np.complex128: if fft_func != "irfftn": - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + else: # float64 for irfftn + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) elif dtype == np.float32: - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) elif dtype == np.float64: - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - cb_store = _set_store_cb(store_code, *types) + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + cb_store = _set_store_cb(store_code, *types, cb_ver=self.cb_ver) + cb_store_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -577,12 +1107,16 @@ def _test_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -590,6 +1124,7 @@ def _test_store_helper(self, xp, dtype, fft_func): def test_fftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -597,6 +1132,7 @@ def test_fftn_store(self, xp, dtype): def test_ifftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -604,6 +1140,7 @@ def test_ifftn_store(self, xp, dtype): def test_rfftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -612,40 +1149,101 @@ def test_irfftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "irfftn") def _test_load_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback store_code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fftn", "ifftn"): if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) elif fft_func == "rfftn": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float64 + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) else: # irfft if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x", "cufftReal", "cufftCallbackStoreR") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") - store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -660,14 +1258,18 @@ def _test_load_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_store=cb_store - ): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -675,6 +1277,7 @@ def _test_load_store_helper(self, xp, dtype, fft_func): def test_fftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -682,6 +1285,7 @@ def test_fftn_load_store(self, xp, dtype): def test_ifftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -689,6 +1293,7 @@ def test_ifftn_load_store(self, xp, dtype): def test_rfftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -697,6 +1302,11 @@ def test_irfftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "irfftn") def _test_load_store_aux_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback_with_aux store_code = _store_callback_with_aux @@ -704,53 +1314,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fftn", "ifftn"): if dtype == np.complex64: load_types = ( "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) elif fft_func == "rfftn": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + "float", + ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # float64 load_types = ( "x", "cufftDoubleReal", "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) else: # irfftn @@ -759,24 +1383,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) - store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") - else: + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + "float", + ) + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x", "cufftDoubleReal", "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", "double", ) - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -791,17 +1426,20 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, - cb_store=cb_store, - cb_load_aux_arr=load_aux, - cb_store_aux_arr=store_aux, - ): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_load_data=load_aux.data, + cb_store_data=store_aux.data, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -809,6 +1447,7 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): def test_fftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -816,6 +1455,7 @@ def test_fftn_load_store_aux(self, xp, dtype): def test_ifftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -823,6 +1463,7 @@ def test_ifftn_load_store_aux(self, xp, dtype): def test_rfftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py index 534b474363f1..369409ba001c 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import warnings @@ -6,6 +8,14 @@ import dpnp as cupy from dpnp.tests.helper import has_support_aspect64 + +# from cupy.fft import config +# from cupy.fft._fft import ( +# _default_fft_func, +# _fft, +# _fftn, +# _size_last_transform_axis, +# ) from dpnp.tests.third_party.cupy import testing from dpnp.tests.third_party.cupy.testing._loops import _wraps_partial @@ -36,12 +46,16 @@ def decorator(impl): @_wraps_partial(impl, name) def test_func(self, *args, **kw): # get original global planning state - # planning_state = config.enable_nd_planning + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # planning_state = config.enable_nd_planning try: for nd_planning in states: try: # enable or disable nd planning - # config.enable_nd_planning = nd_planning + # with pytest.warns( + # DeprecationWarning, match="enable_nd_planning" + # ): + # config.enable_nd_planning = nd_planning kw[name] = nd_planning impl(self, *args, **kw) @@ -50,7 +64,10 @@ def test_func(self, *args, **kw): raise finally: # restore original global planning state - # config.enable_nd_planning = planning_state + # with pytest.warns( + # DeprecationWarning, match="enable_nd_planning" + # ): + # config.enable_nd_planning = planning_state pass return test_func @@ -71,8 +88,8 @@ def multi_gpu_config(gpu_configs=None): def decorator(impl): @functools.wraps(impl) def test_func(self, *args, **kw): - use_multi_gpus = config.use_multi_gpus - _devices = config._devices + use_multi_gpus = config._use_multi_gpus.get() + _devices = config._devices.get() try: for gpus in gpu_configs: @@ -81,23 +98,21 @@ def test_func(self, *args, **kw): assert nGPUs >= 2, "Must use at least two gpus" config.use_multi_gpus = True config.set_cufft_gpus(gpus) - self.gpus = gpus impl(self, *args, **kw) except Exception: print("GPU config is:", gpus) raise finally: - config.use_multi_gpus = use_multi_gpus - config._devices = _devices - del self.gpus + config._use_multi_gpus.set(use_multi_gpus) + config._devices.set(_devices) return test_func return decorator -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *testing.product( @@ -138,7 +153,7 @@ def test_ifft(self, xp, dtype): return xp.fft.ifft(a, n=self.n, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( *testing.product( { @@ -179,19 +194,6 @@ def test_ifft(self, xp, dtype): return xp.fft.ifft(a, axis=self.axis) -# See #3757 and NVIDIA internal ticket 3093094 -def _skip_multi_gpu_bug(shape, gpus): - # avoid CUDA 11.0 (will be fixed by CUDA 11.2) bug triggered by - # - batch = 1 - # - gpus = [1, 0] - if ( - 11000 <= cupy.cuda.runtime.runtimeGetVersion() < 11020 - and len(shape) == 1 - and gpus == [1, 0] - ): - pytest.skip("avoid CUDA 11 bug") - - # Almost identical to the TestFft class, except that # 1. multi-GPU cuFFT is used # 2. the tested parameter combinations are adjusted to meet the requirements @@ -208,6 +210,9 @@ def _skip_multi_gpu_bug(shape, gpus): ) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuFft: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @@ -216,8 +221,6 @@ class TestMultiGpuFft: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) return xp.fft.fft(a, n=self.n, norm=self.norm) @@ -230,8 +233,6 @@ def test_fft(self, xp, dtype): @testing.with_requires("numpy!=1.17.0") @testing.with_requires("numpy!=1.17.1") def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) return xp.fft.ifft(a, n=self.n, norm=self.norm) @@ -251,6 +252,9 @@ def test_ifft(self, xp, dtype): ) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuFftOrder: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @testing.for_complex_dtypes() @@ -258,8 +262,6 @@ class TestMultiGpuFftOrder: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) @@ -271,8 +273,6 @@ def test_fft(self, xp, dtype): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) @@ -348,9 +348,13 @@ def test_default_fft_func(self, enable_nd): @pytest.mark.skip("memory management is not supported") @testing.with_requires("numpy>=2.0") +# @pytest.mark.skipif( +# 10010 <= cupy.cuda.runtime.runtimeGetVersion() <= 11010, +# reason="avoid a cuFFT bug (cupy/cupy#3777)", +# ) @testing.slow class TestFftAllocate: - + # @pytest.mark.thread_unsafe(reason="does large allocations") def test_fft_allocate(self): # Check CuFFTError is not raised when the GPU memory is enough. # See https://github.com/cupy/cupy/issues/1063 @@ -368,7 +372,7 @@ def test_fft_allocate(self): cupy.fft.config.clear_plan_cache() -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( @@ -413,7 +417,8 @@ class TestFft2: type_check=has_support_aspect64(), ) def test_fft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -440,7 +445,8 @@ def test_fft2(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_ifft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -457,7 +463,7 @@ def test_ifft2(self, xp, dtype, order, enable_nd): return out -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( @@ -503,7 +509,8 @@ class TestFftn: type_check=has_support_aspect64(), ) def test_fftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -530,7 +537,8 @@ def test_fftn(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_ifftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -595,7 +603,8 @@ def skip_buggy(self): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -613,7 +622,8 @@ def test_fftn(self, xp, dtype, enable_nd): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -635,7 +645,8 @@ def test_fftn_error_on_wrong_plan(self, dtype, enable_nd): from cupy.fft import fftn from cupyx.scipy.fftpack import get_fft_plan - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd # can't get a plan, so skip if self.axes is not None: @@ -747,6 +758,9 @@ def test_fft_error_on_wrong_plan(self, dtype): ) @pytest.mark.skip("get_fft_plan() is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuPlanCtxManagerFft: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @@ -755,8 +769,6 @@ class TestMultiGpuPlanCtxManagerFft: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -776,8 +788,6 @@ def test_fft(self, xp, dtype): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -1029,7 +1039,8 @@ class TestRfft2: type_check=has_support_aspect64(), ) def test_rfft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1046,14 +1057,21 @@ def test_rfft2(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_irfft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd + # if ( + # 10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010 + # and int(cupy.cuda.device.get_compute_capability()) < 70 + # and _size_last_transform_axis(self.shape, self.s, self.axes) == 2 + # ): + # pytest.skip("work-around for cuFFT issue") a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) return xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, @@ -1115,7 +1133,8 @@ class TestRfftn: type_check=has_support_aspect64(), ) def test_rfftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1132,7 +1151,14 @@ def test_rfftn(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_irfftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd + # if ( + # 10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010 + # and int(cupy.cuda.device.get_compute_capability()) < 70 + # and _size_last_transform_axis(self.shape, self.s, self.axes) == 2 + # ): + # pytest.skip("work-around for cuFFT issue") a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1182,7 +1208,8 @@ def skip_buggy(self): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_rfftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -1194,13 +1221,17 @@ def test_rfftn(self, xp, dtype, enable_nd): with plan: return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) + # @pytest.mark.skipif( + # cupy.cuda.runtime.is_hip, reason="hipFFT's PlanNd for C2R is buggy" + # ) @nd_planning_states() @testing.for_all_dtypes() @testing.numpy_cupy_allclose( rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_irfftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) @@ -1282,7 +1313,7 @@ def test_ifftn_orders(self, dtype, enable_nd): pass -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, @@ -1343,7 +1374,7 @@ def test_ihfft(self, xp, dtype): return xp.fft.ihfft(a, n=self.n, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"n": 1, "d": 1}, {"n": 10, "d": 0.5}, @@ -1372,7 +1403,7 @@ def test_rfftfreq(self, xp, dtype): return xp.fft.rfftfreq(self.n, self.d) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (5,), "axes": None}, {"shape": (5,), "axes": 0}, From 8f8a1f9ef2c712e79a316542f8b59e4535ea0e85 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 19 Jan 2026 07:45:16 -0800 Subject: [PATCH 3/3] Update tests for random.choice --- .../cupy/random_tests/test_generator.py | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index 9d1a3d233473..abb58df07af9 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import os import threading @@ -850,6 +852,7 @@ def test_goodness_of_fit(self): assert _hypothesis.chi_square_test(counts, expected) @_condition.repeat(3, 10) + # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug") def test_goodness_of_fit_2(self): vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get() counts = numpy.histogram(vals, bins=numpy.arange(4))[0] @@ -929,6 +932,163 @@ def test_bound(self): assert numpy.unique(val).size == val.size +@testing.parameterize( + # Edge cases with small domain sizes + {"a": 0, "size": 0}, + {"a": 1, "size": 1}, + {"a": 2, "size": 1}, + {"a": 256, "size": 100}, # Minimum cipher bits threshold + {"a": 257, "size": 100}, + # large scalare uniqueness + {"a": 100, "size": 50}, + {"a": 1000, "size": 500}, + {"a": 10000, "size": 5000}, + {"a": 100000, "size": 50000}, + # full inpupt permutation + {"a": 10, "size": 10}, + {"a": 100, "size": 100}, + {"a": 1000, "size": 1000}, + # Power of 2 + {"a": 2**8, "size": 100}, + {"a": 2**10, "size": 500}, + {"a": 2**16, "size": 1000}, + {"a": 2**20, "size": 5000}, + {"a": 2**24, "size": 10000}, + # Just below power of 2 + {"a": 2**8 - 1, "size": 100}, + {"a": 2**16 - 1, "size": 1000}, + {"a": 2**20 - 1, "size": 5000}, + # Just above power of 2 + {"a": 2**8 + 1, "size": 100}, + {"a": 2**16 + 1, "size": 1000}, + {"a": 2**20 + 1, "size": 5000}, + # Test multi-dimensional shapes. + {"a": 6, "size": (2, 3)}, + {"a": 32, "size": (4, 5)}, + {"a": 120, "size": (5, 4, 5)}, +) +@testing.fix_random() +class TestChoiceReplaceFalseLargeScale(RandomGeneratorTestCase): + """Test large-scale uniqueness for Feistel bijection implementation.""" + + target_method = "choice" + + def test_uniqueness_and_bounds(self): + """Test that samples have no duplicates and correct bounds.""" + val = self.generate(a=self.a, size=self.size, replace=False).get() + size = self.size if isinstance(self.size, tuple) else (self.size,) + + # Check shape + assert val.shape == size + + # Check bounds + assert (0 <= val).all() + assert (val < self.a).all() + + # Check uniqueness + val_flat = numpy.asarray(val).flatten() + assert ( + numpy.unique(val_flat).size == val_flat.size + ), "Found duplicate values in replace=False sample" + + +@testing.fix_random() +class TestChoiceReplaceFalseStatistical(RandomGeneratorTestCase): + """Statistical tests for uniformity of Feistel bijection.""" + + target_method = "choice" + + @_condition.repeat(3) + def test_small_domain_uniformity(self): + """Chi-square test for uniform sampling in small domain.""" + # Sample from domain of size 10, taking 5 elements + # Repeat many times and check each index appears uniformly + n = 10 + sample_size = 5 + n_trials = 1000 + + counts = cupy.zeros(n, dtype=int) + vals = self.generate_many( + n, size=sample_size, replace=False, _count=n_trials + ) + for val in vals: + counts[val] += 1 + counts = counts.get() + + # Each index should appear ~500 times (5/10 * 1000) + expected = numpy.ones(n, dtype=int) * (sample_size * n_trials // n) + assert _hypothesis.chi_square_test(counts, expected) + + @_condition.repeat(3, 10) + def test_permutation_variability(self): + """Test that repeated full permutations are different.""" + n = 20 + n_trials = 10 + + vals = self.generate_many(n, size=n, replace=False, _count=n_trials) + perms = cupy.vstack(vals) + + # Should have multiple unique permutations + unique_perms = cupy.unique(perms, axis=0) + assert ( + len(unique_perms) == n_trials + ), "Permutations should vary across multiple calls" + + +@testing.slow +@testing.fix_random() +class TestChoiceReplaceFalseVeryLargeDomain(unittest.TestCase): + """Test memory efficiency with very large domains.""" + + def setUp(self): + self.rs = _generator.RandomState(seed=testing.generate_seed()) + + def test_large_domain_memory_efficiency(self): + """Test that very large domains don't allocate full arrays.""" + # This should NOT allocate a 2^30 element array + # If it did, it would require ~8GB of memory + a = 2**30 + size = 1000 + + val = self.rs.choice(a=a, size=size, replace=False).get() + + # Check bounds + assert (0 <= val).all() + assert (val < a).all() + + # Check uniqueness + assert numpy.unique(val).size == size + + def test_near_32bit_limit(self): + """Test at the 32-bit boundary.""" + # Current implementation supports up to 2^32 + a = 2**31 + size = 500 + + val = self.rs.choice(a=a, size=size, replace=False).get() + + # Check bounds + assert (0 <= val).all() + assert (val < a).all() + + # Check uniqueness + assert numpy.unique(val).size == size + + +@testing.fix_random() +class TestChoiceReplaceFalseDtypeConsistency(RandomGeneratorTestCase): + """Test output dtype consistency.""" + + target_method = "choice" + + def test_integer_input_dtype(self): + """Integer input should produce int64/long dtype.""" + val = self.generate(a=100, size=50, replace=False) + + # Should be 'l' (long) dtype, which is int64 on most platforms + assert val.dtype == numpy.dtype("l") or val.dtype == numpy.int64 + + @testing.fix_random() class TestGumbel(RandomGeneratorTestCase):