From 71b8e58135e2835715062b594028cfb286fb6039 Mon Sep 17 00:00:00 2001 From: Gabriele Cimador Date: Fri, 19 Dec 2025 10:59:27 +0100 Subject: [PATCH] GPU Framework: remove GPUDefParametersDefaults.h and automatically generate GPU parameters using json file and CMake --- GPU/GPUTracking/CMakeLists.txt | 22 +- .../Definitions/GPUDefParametersDefaults.h | 589 ------------------ .../Definitions/GPUParameters.json | 582 +++++++++++++++++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../cmake/generateGPUParamHeader.cmake | 56 ++ .../cmake/gpu_param_header_generator.cmake | 97 +++ GPU/documentation/build-O2.md | 2 +- 7 files changed, 751 insertions(+), 599 deletions(-) delete mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h create mode 100644 GPU/GPUTracking/Definitions/GPUParameters.json create mode 100644 GPU/GPUTracking/cmake/generateGPUParamHeader.cmake create mode 100644 GPU/GPUTracking/cmake/gpu_param_header_generator.cmake diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 6a60eb9edd6d0..ab5ef8f9bd0c9 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -106,6 +106,12 @@ set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Global/GPUChainTrackingDebugAndProfiling.cxx Global/GPUChainTrackingIO.cxx) +set(ON_THE_FLY_DIR ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) +file(MAKE_DIRECTORY ${ON_THE_FLY_DIR}) +include(cmake/generateGPUParamHeader.cmake) +set(GPU_DEFAULT_PARAMS_HEADER ${ON_THE_FLY_DIR}/GPUDefParametersDefaults.h) +generate_gpu_param_header("AUTO" ${GPU_DEFAULT_PARAMS_HEADER}) # generate header with default GPU parameters, arch selected by CMake variables + set(HDRS_INSTALL ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} @@ -134,9 +140,9 @@ set(HDRS_INSTALL DataTypes/GPUO2ExternalUser.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h + ${GPU_DEFAULT_PARAMS_HEADER} Definitions/GPUDefParametersWrapper.h Definitions/GPUDefParametersConstants.h - Definitions/GPUDefParametersDefaults.h Definitions/GPUDef.h Definitions/GPUDefMacros.h Definitions/GPULogging.h @@ -238,8 +244,6 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h Definitions/GPUDefParametersLoad.template.inc) set(GENERATED_HEADERS_LIST "") -set(ON_THE_FLY_DIR ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) -file(MAKE_DIRECTORY ${ON_THE_FLY_DIR}) foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) get_filename_component(OUTPUT_FILE_NAME ${TEMPLATE_FILE} NAME) string(REPLACE ".template" "" OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}) @@ -285,6 +289,7 @@ set(HDRS_CINT_DATATYPES ${HDRS_CINT_DATATYPES} ${HDRS_TMP}) unset(HDRS_TMP) set(INCDIRS + ${ON_THE_FLY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/Definitions ${CMAKE_CURRENT_SOURCE_DIR}/DataTypes @@ -301,14 +306,14 @@ set(INCDIRS ${CMAKE_CURRENT_SOURCE_DIR}/Refit ${CMAKE_CURRENT_SOURCE_DIR}/Debug ${CMAKE_CURRENT_SOURCE_DIR}/DataCompression - ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder - ${ON_THE_FLY_DIR}) + ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder) # Main CMake part for O2 if(ALIGPU_BUILD_TYPE STREQUAL "O2") o2_add_library(GPUDataTypes TARGETVARNAME targetName PUBLIC_INCLUDE_DIRECTORIES . + ${ON_THE_FLY_DIR} Definitions DataTypes PUBLIC_LINK_LIBRARIES O2::GPUUtils @@ -408,16 +413,17 @@ set(GPU_CONST_PARAM_ARCHITECTUES AMPERE TURING VEGA MI100) set(GPU_CONST_PARAM_FILES "") foreach(GPU_ARCH ${GPU_CONST_PARAM_ARCHITECTUES}) set(PARAMFILE ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch/gpu_const_param_${GPU_ARCH}.par) + set(GPU_ARCH_PARAMS_HEADER ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch/GPUDefParametersDefaults_${GPU_ARCH}.h) + generate_gpu_param_header(${GPU_ARCH} ${GPU_ARCH_PARAMS_HEADER}) add_custom_command( OUTPUT ${PARAMFILE} COMMAND bash -c - "echo -e '#define GPUCA_GPUTYPE_${GPU_ARCH}\\n#define PARAMETER_FILE \"GPUDefParametersDefaults.h\"\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/Definitions\");\\ngInterpreter->AddIncludePath(\"${ON_THE_FLY_DIR}\");\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C(\"${PARAMFILE}\")\\n.q\\n'" + "echo -e '#define GPUCA_GPUTYPE_${GPU_ARCH}\\n#define PARAMETER_FILE \"${GPU_ARCH_PARAMS_HEADER}\"\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/Definitions\");\\ngInterpreter->AddIncludePath(\"${ON_THE_FLY_DIR}\");\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C(\"${PARAMFILE}\")\\n.q\\n'" | root -l -b > /dev/null VERBATIM WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch MAIN_DEPENDENCY Standalone/tools/dumpGPUDefParam.C - DEPENDS Definitions/GPUDefParametersDefaults.h - ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h + DEPENDS ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h ${ON_THE_FLY_DIR}/GPUDefParametersLoad.inc COMMENT "Generating GPU parameter set for architecture ${GPU_ARCH}") LIST(APPEND GPU_CONST_PARAM_FILES ${PARAMFILE}) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h deleted file mode 100644 index 01ae33dc3b4d8..0000000000000 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUDefParametersDefaults.h -/// \author David Rohr - -// This file contains compile-time constants affecting the GPU performance. - -#if !defined(GPUDEFPARAMETERSDEFAULTS_H) -#define GPUDEFPARAMETERSDEFAULTS_H -// clang-format off - -// Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) - -// GPU Run Configuration -#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. - // GPU-architecture-dependent default settings - #if defined(GPUCA_GPUTYPE_MI100) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_PAR_AMD_EUS_PER_CU 4 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 256, 7 - #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 5 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024, 2 - #define GPUCA_LB_GPUTPCTrackletConstructor 768, 8 - #define GPUCA_LB_GPUTPCTrackletSelector 384, 5 - #define GPUCA_LB_GPUTPCNeighboursFinder 192, 8 - #define GPUCA_LB_GPUTPCNeighboursCleaner 128, 5 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256, 7 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE, 4 - #define GPUCA_LB_GPUTPCCFGather 1024, 5 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 192, 2 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 5 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 4 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 768, 1 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128, 1 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64, 10 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512, 9 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512, 5 - #define GPUCA_LB_GPUTPCCFClusterizer 448, 3 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 10 - #define PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 4 - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 9 - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 - #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 - #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_PAR_COMP_GATHER_KERNEL 4 - #define GPUCA_PAR_COMP_GATHER_MODE 3 - #elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_PAR_AMD_EUS_PER_CU 4 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 192, 2 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 7 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024, 7 - #define GPUCA_LB_GPUTPCTrackletConstructor 512, 10 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 10 - #define GPUCA_LB_GPUTPCNeighboursFinder 960, 8 - #define GPUCA_LB_GPUTPCNeighboursCleaner 384, 9 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256, 2 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 1 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE, 14 - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 7 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256, 2 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64, 2 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512, 4 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512, 5 - #define GPUCA_LB_GPUTPCCFClusterizer 512, 2 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 2 - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 27 - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 - #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 - #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_PAR_COMP_GATHER_KERNEL 4 - #define GPUCA_PAR_COMP_GATHER_MODE 3 - #elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 384 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 - #define GPUCA_LB_GPUTPCCFPeakFinder 128 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 - #define GPUCA_LB_GPUTPCCFDeconvolution 384 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 - #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 - #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_PAR_COMP_GATHER_KERNEL 4 - #define GPUCA_PAR_COMP_GATHER_MODE 3 - #elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 - #define GPUCA_PAR_SORT_BEFORE_FIT 1 - #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 - #define GPUCA_PAR_COMP_GATHER_KERNEL 4 - #define GPUCA_PAR_COMP_GATHER_MODE 3 - #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - #elif defined(GPUCA_GPUTYPE_OPENCL) - #else - #error GPU TYPE NOT SET - #endif - - // Default settings for GPU, if not already set for selected GPU type - #ifndef GPUCA_WARP_SIZE - #define GPUCA_WARP_SIZE 32 - #endif - #ifndef GPUCA_PAR_AMD_EUS_PER_CU - #define GPUCA_PAR_AMD_EUS_PER_CU 0 - #endif - #ifndef GPUCA_THREAD_COUNT_DEFAULT - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #endif - #ifndef GPUCA_LB_GPUTPCCreateTrackingData - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #endif - #ifndef GPUCA_LB_GPUTPCTrackletConstructor - #define GPUCA_LB_GPUTPCTrackletConstructor 256 - #endif - #ifndef GPUCA_LB_GPUTPCTrackletSelector - #define GPUCA_LB_GPUTPCTrackletSelector 256 - #endif - #ifndef GPUCA_LB_GPUTPCNeighboursFinder - #define GPUCA_LB_GPUTPCNeighboursFinder 256 - #endif - #ifndef GPUCA_LB_GPUTPCNeighboursCleaner - #define GPUCA_LB_GPUTPCNeighboursCleaner 256 - #endif - #ifndef GPUCA_LB_GPUTPCExtrapolationTracking - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #endif - #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion - #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 512 - #endif - #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill - #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 256 - #endif - #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold - #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 256 - #endif - #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version - #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 512 - #endif - #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 256 - #endif - #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 256 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 256 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 256 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256 - #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters - #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256 - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZS - #define GPUCA_LB_GPUTPCCFDecodeZS 128, 4 - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #endif - #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #endif - #ifndef GPUCA_LB_GPUTPCCFGather - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #endif - #ifndef GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit - #define GPUCA_LB_GPUTPCGMMergerTrackFit 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerCollect - #define GPUCA_LB_GPUTPCGMMergerCollect 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step0 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step1 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 - #define GPUCA_LB_GPUTPCGMMergerPrepareForFit_step2 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2 - #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 - #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare - #define GPUCA_LB_GPUTPCGMO2Output_prepare 256 - #endif - #ifndef GPUCA_LB_GPUTPCGMO2Output_output - #define GPUCA_LB_GPUTPCGMO2Output_output 256 - #endif - #ifndef GPUCA_LB_GPUTPCStartHitsFinder - #define GPUCA_LB_GPUTPCStartHitsFinder 256 - #endif - #ifndef GPUCA_LB_GPUTPCStartHitsSorter - #define GPUCA_LB_GPUTPCStartHitsSorter 256 - #endif - #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFPeakFinder - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFDeconvolution - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #endif - #ifndef GPUCA_LB_GPUTPCCFClusterizer - #define GPUCA_LB_GPUTPCCFClusterizer 512 - #endif - #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels 512 - #endif - #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU - #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256 - #endif - #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov - #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 - #endif - #ifndef GPUCA_LB_GPUMemClean16 - #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT_DEFAULT, 1 - #endif - #ifndef GPUCA_LB_GPUitoa - #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT_DEFAULT, 1 - #endif - // These kernel launch-bounds are derrived from one of the constants set above - #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression - #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression - - #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNCPU GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNGPU 1024 - #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishDeconvolutionFlags GPUCA_LB_GPUTPCNNClusterizerKernels - - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER - #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER - - // Defaults for non-LB parameters - #ifndef GPUCA_PAR_SORT_STARTHITS - #define GPUCA_PAR_SORT_STARTHITS 1 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 4 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 1 - #endif - #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_PAR_SORT_BEFORE_FIT - #define GPUCA_PAR_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_PAR_COMP_GATHER_KERNEL - #define GPUCA_PAR_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_PAR_COMP_GATHER_MODE - #define GPUCA_PAR_COMP_GATHER_MODE 2 - #endif - #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 512 - #endif -#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) - -#ifndef GPUCA_GPUCODE_GENRTC - // Defaults (also for CPU) for non-LB parameters - #ifndef GPUCA_PAR_SORT_STARTHITS - #define GPUCA_PAR_SORT_STARTHITS 0 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 0 - #endif - #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED - #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 - #endif - #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #endif - #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT - #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_PAR_SORT_BEFORE_FIT - #define GPUCA_PAR_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_PAR_COMP_GATHER_KERNEL - #define GPUCA_PAR_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_PAR_COMP_GATHER_MODE - #define GPUCA_PAR_COMP_GATHER_MODE 0 - #endif - #ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK - #define GPUCA_PAR_NO_ATOMIC_PRECHECK 0 - #endif - #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE - #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 0 - #endif - #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE - #define GPUCA_PAR_DEDX_STORAGE_TYPE float - #endif - #ifndef GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE float - #endif -#endif // GPUCA_GPUCODE_GENRTC - -// clang-format on -#endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUParameters.json b/GPU/GPUTracking/Definitions/GPUParameters.json new file mode 100644 index 0000000000000..e8f1c24520813 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUParameters.json @@ -0,0 +1,582 @@ +{ + "CORE": { + "WARP_SIZE": { + "default": 32, + "MI100": 64, + "VEGA": 64, + "AMPERE": 32, + "TURING": 32 + }, + "THREAD_COUNT_DEFAULT": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 512, + "TURING": 512 + } + }, + "LB": { + "GPUTPCCreateTrackingData": { + "default": 256, + "MI100": [256, 7], + "VEGA": [192, 2], + "AMPERE": 384, + "TURING": 256 + }, + "GPUTPCTrackletConstructor": { + "default": 256, + "MI100": [768, 8], + "VEGA": [512, 10], + "AMPERE": [256, 2], + "TURING": [256, 2] + }, + "GPUTPCTrackletSelector": { + "default": 256, + "MI100": [384, 5], + "VEGA": [192, 10], + "AMPERE": [192, 3], + "TURING": [192, 3] + }, + "GPUTPCNeighboursFinder": { + "default": 256, + "MI100": [192, 8], + "VEGA": [960, 8], + "AMPERE": [640, 1], + "TURING": [640, 1] + }, + "GPUTPCNeighboursCleaner": { + "default": 256, + "MI100": [128, 5], + "VEGA": [384, 9], + "AMPERE": 512, + "TURING": 512 + }, + "GPUTPCExtrapolationTracking": { + "default": 256, + "MI100": [256, 7], + "VEGA": [256, 2], + "AMPERE": [128, 4], + "TURING": [192, 2] + }, + "GPUTRDTrackerKernels_gpuVersion": { + "default": 512 + }, + "GPUTPCCreateOccupancyMap_fill": { + "default": 256 + }, + "GPUTPCCreateOccupancyMap_fold": { + "default": 256 + }, + "GPUTRDTrackerKernels_o2Version": { + "default": 512 + }, + "GPUTPCCompressionKernels_step0attached": { + "default": 256, + "MI100": [128, 1], + "VEGA": [64, 2], + "AMPERE": [64, 2], + "TURING": 128 + }, + "GPUTPCCompressionKernels_step1unattached": { + "default": 256, + "MI100": [512, 2], + "VEGA": [512, 2], + "AMPERE": [512, 3], + "TURING": [512, 2] + }, + "GPUTPCDecompressionKernels_step0attached": { + "default": 256, + "MI100": [128, 2], + "VEGA": [128, 2], + "AMPERE": [32, 1], + "TURING": [32, 1] + }, + "GPUTPCDecompressionKernels_step1unattached": { + "default": 256, + "MI100": [64, 2], + "VEGA": [64, 2], + "AMPERE": [32, 1], + "TURING": [32, 1] + }, + "GPUTPCDecompressionUtilKernels_sortPerSectorRow": { + "default": 256 + }, + "GPUTPCDecompressionUtilKernels_countFilteredClusters": { + "default": 256 + }, + "GPUTPCDecompressionUtilKernels_storeFilteredClusters": { + "default": 256 + }, + "GPUTPCCFDecodeZS": { + "default": [128, 4], + "MI100": [64, 4], + "VEGA": [64, 1], + "AMPERE": [64, 10], + "TURING": [64, 8] + }, + "GPUTPCCFDecodeZSLink": { + "default": "GPUCA_WARP_SIZE", + "MI100": "GPUCA_WARP_SIZE", + "VEGA": "GPUCA_WARP_SIZE", + "AMPERE": "GPUCA_WARP_SIZE", + "TURING": "GPUCA_WARP_SIZE" + }, + "GPUTPCCFDecodeZSDenseLink": { + "default": "GPUCA_WARP_SIZE", + "MI100": ["GPUCA_WARP_SIZE", 4], + "VEGA": ["GPUCA_WARP_SIZE", 14], + "AMPERE": "GPUCA_WARP_SIZE", + "TURING": "GPUCA_WARP_SIZE" + }, + "GPUTPCCFGather": { + "default": [1024, 1], + "MI100": [1024, 5], + "VEGA": [1024, 1], + "AMPERE": [1024, 1], + "TURING": [1024, 1] + }, + "COMPRESSION_GATHER": { + "default": 1024, + "MI100": 1024, + "VEGA": 1024, + "AMPERE": 1024, + "TURING": 1024 + }, + "GPUTPCGMMergerTrackFit": { + "default": 256, + "MI100": [192, 2], + "VEGA": [64, 7], + "AMPERE": [64, 4], + "TURING": [32, 8] + }, + "GPUTPCGMMergerFollowLoopers": { + "default": 256, + "MI100": [256, 5], + "VEGA": [256, 4], + "AMPERE": [64, 12], + "TURING": [128, 4] + }, + "GPUTPCGMMergerSectorRefit": { + "default": 256, + "MI100": [64, 4], + "VEGA": [256, 2], + "AMPERE": [32, 6], + "TURING": [64, 5] + }, + "GPUTPCGMMergerUnpackResetIds": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerUnpackGlobal": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerResolve_step0": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerResolve_step1": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerResolve_step2": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerResolve_step3": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerResolve_step4": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": [256, 4], + "TURING": [256, 4] + }, + "GPUTPCGMMergerClearLinks": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerMergeWithinPrepare": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerMergeSectorsPrepare": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": [256, 2], + "TURING": [256, 2] + }, + "GPUTPCGMMergerMergeBorders_step0": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 192, + "TURING": 192 + }, + "GPUTPCGMMergerMergeBorders_step2": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": [64, 2], + "TURING": 256 + }, + "GPUTPCGMMergerMergeCE": { + "default": 256, + "MI100": 512, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerLinkExtrapolatedTracks": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerCollect": { + "default": 256, + "MI100": [768, 1], + "VEGA": [1024, 1], + "AMPERE": [256, 2], + "TURING": [128, 2] + }, + "GPUTPCGMMergerSortTracksPrepare": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerPrepareForFit_step0": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerPrepareForFit_step1": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerPrepareForFit_step2": { + "default": 256, + "MI100": 256, + "VEGA": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerFinalize_step0": { + "default": 256, + "VEGA": 256 + }, + "GPUTPCGMMergerFinalize_step1": { + "default": 256, + "VEGA": 256 + }, + "GPUTPCGMMergerFinalize_step2": { + "default": 256, + "VEGA": 256 + }, + "GPUTPCGMMergerMergeLoopers_step0": { + "default": 256 + }, + "GPUTPCGMMergerMergeLoopers_step1": { + "default": 256 + }, + "GPUTPCGMMergerMergeLoopers_step2": { + "default": 256 + }, + "GPUTPCGMO2Output_prepare": { + "default": 256 + }, + "GPUTPCGMO2Output_output": { + "default": 256 + }, + "GPUTPCStartHitsFinder": { + "default": 256, + "MI100": [1024, 2], + "VEGA": [1024, 7], + "AMPERE": 512, + "TURING": 512 + }, + "GPUTPCStartHitsSorter": { + "default": 256, + "MI100": [1024, 5], + "VEGA": [512, 7], + "AMPERE": [512, 1], + "TURING": [512, 1] + }, + "GPUTPCCFCheckPadBaseline": { + "default": 64, + "MI100": [64, 10], + "VEGA": [64, 2], + "AMPERE": [64, 8] + }, + "GPUTPCCFChargeMapFiller_fillIndexMap": { + "default": 512, + "MI100": 512, + "VEGA": 512, + "AMPERE": 448 + }, + "GPUTPCCFChargeMapFiller_fillFromDigits": { + "default": 512, + "MI100": 512, + "VEGA": 512, + "AMPERE": 448 + }, + "GPUTPCCFChargeMapFiller_findFragmentStart": { + "default": 512, + "MI100": 512, + "VEGA": 512, + "AMPERE": 448 + }, + "GPUTPCCFPeakFinder": { + "default": 512, + "MI100": [512, 9], + "VEGA": [512, 4], + "AMPERE": 128 + }, + "GPUTPCCFNoiseSuppression": { + "default": 512, + "MI100": 512, + "VEGA": 512, + "AMPERE": 448 + }, + "GPUTPCCFDeconvolution": { + "default": 512, + "MI100": [512, 5], + "VEGA": [512, 5], + "AMPERE": 384 + }, + "GPUTPCCFClusterizer": { + "default": 512, + "MI100": [448, 3], + "VEGA": [512, 2], + "AMPERE": 448 + }, + "GPUTPCNNClusterizerKernels": { + "default": 512 + }, + "GPUTrackingRefitKernel_mode0asGPU": { + "default": 256 + }, + "GPUTrackingRefitKernel_mode1asTrackParCov": { + "default": 256 + }, + "GPUMemClean16": { + "default": ["GPUCA_THREAD_COUNT_DEFAULT", 1] + }, + "GPUitoa": { + "default": ["GPUCA_THREAD_COUNT_DEFAULT", 1] + }, + "GPUTPCCFNoiseSuppression_noiseSuppression": { + "default": "GPUCA_LB_GPUTPCCFNoiseSuppression" + }, + "GPUTPCCFNoiseSuppression_updatePeaks": { + "default": "GPUCA_LB_GPUTPCCFNoiseSuppression" + }, + "GPUTPCNNClusterizerKernels_runCfClusterizer": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_fillInputNNCPU": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_fillInputNNGPU": { + "default": 1024 + }, + "GPUTPCNNClusterizerKernels_determineClass1Labels": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_determineClass2Labels": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_publishClass1Regression": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_publishClass2Regression": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCNNClusterizerKernels_publishDeconvolutionFlags": { + "default": "GPUCA_LB_GPUTPCNNClusterizerKernels" + }, + "GPUTPCCFStreamCompaction_scanStart": { + "default": "GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE" + }, + "GPUTPCCFStreamCompaction_scanUp": { + "default": "GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE" + }, + "GPUTPCCFStreamCompaction_scanTop": { + "default": "GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE" + }, + "GPUTPCCFStreamCompaction_scanDown": { + "default": "GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE" + }, + "GPUTPCCFStreamCompaction_compactDigits": { + "default": "GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE" + }, + "GPUTPCCompressionGatherKernels_unbuffered": { + "default": "GPUCA_LB_COMPRESSION_GATHER" + }, + "GPUTPCCompressionGatherKernels_buffered32": { + "default": "GPUCA_LB_COMPRESSION_GATHER" + }, + "GPUTPCCompressionGatherKernels_buffered64": { + "default": "GPUCA_LB_COMPRESSION_GATHER" + }, + "GPUTPCCompressionGatherKernels_buffered128": { + "default": "GPUCA_LB_COMPRESSION_GATHER" + }, + "GPUTPCCompressionGatherKernels_multiBlock": { + "default": "GPUCA_LB_COMPRESSION_GATHER" + }, + "GPUTPCGMMergerFinalize_0": { + "default": 256, + "MI100": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerFinalize_1": { + "default": 256, + "MI100": 256, + "AMPERE": 256, + "TURING": 256 + }, + "GPUTPCGMMergerFinalize_2": { + "default": 256, + "MI100": 256, + "AMPERE": 256, + "TURING": 256 + } + }, + "PAR": { + "AMD_EUS_PER_CU": { + "default": 0, + "default_cpu": 0, + "MI100": 4, + "VEGA": 4 + }, + "SORT_STARTHITS": { + "default": 1, + "default_cpu": 0 + }, + "NEIGHBOURS_FINDER_MAX_NNEIGHUP": { + "default": 6, + "default_cpu": 0, + "MI100": 10, + "VEGA": 4, + "AMPERE": 4, + "TURING": 4 + }, + "NEIGHBOURS_FINDER_UNROLL_GLOBAL": { + "default": 4, + "default_cpu": 0, + "MI100": 4, + "VEGA": 2 + }, + "NEIGHBOURS_FINDER_UNROLL_SHARED": { + "default": 1, + "default_cpu": 0, + "MI100": 0, + "VEGA": 0 + }, + "TRACKLET_SELECTOR_HITS_REG_SIZE": { + "default": 12, + "default_cpu": 0, + "MI100": 9, + "VEGA": 27, + "AMPERE": 20, + "TURING": 20 + }, + "ALTERNATE_BORDER_SORT": { + "default": 0, + "default_cpu": 0, + "MI100": 1, + "VEGA": 1, + "AMPERE": 1, + "TURING": 1 + }, + "SORT_BEFORE_FIT": { + "default": 0, + "default_cpu": 0, + "MI100": 1, + "VEGA": 1, + "AMPERE": 1, + "TURING": 1 + }, + "NO_ATOMIC_PRECHECK": { + "default": 0, + "default_cpu": 0, + "MI100": 1, + "VEGA": 1, + "AMPERE": 1, + "TURING": 1 + }, + "DEDX_STORAGE_TYPE": { + "default": "float", + "default_cpu": "float", + "MI100": "uint16_t", + "VEGA": "uint16_t", + "AMPERE": "uint16_t", + "TURING": "uint16_t" + }, + "MERGER_INTERPOLATION_ERROR_TYPE": { + "default": "float", + "default_cpu": "float", + "MI100": "half", + "VEGA": "half", + "AMPERE": "half", + "TURING": "half" + }, + "COMP_GATHER_KERNEL": { + "default": 0, + "default_cpu": 0, + "MI100": 4, + "VEGA": 4, + "AMPERE": 4, + "TURING": 4 + }, + "COMP_GATHER_MODE": { + "default": 2, + "default_cpu": 0, + "MI100": 3, + "VEGA": 3, + "AMPERE": 3, + "TURING": 3 + }, + "CF_SCAN_WORKGROUP_SIZE": { + "default": 512, + "default_cpu": 0 + } + } +} diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index d70fac115eab7..6753e5c64288d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -623,7 +623,7 @@ AddSubConfig(GPUSettingsEG, EG) EndConfig() #endif // BeginConfig -//Settings for the O2 workfllow +//Settings for the O2 workflow #if !defined(QCONFIG_PARSER_CXX) && (defined(GPUCA_O2_LIB) || defined(GPUCA_O2_INTERFACE)) BeginSubConfig(GPUSettingsO2, global, configStandalone, "O2", 0, "O2 workflow settings", global) AddOption(solenoidBzNominalGPU, float, -1e6f, "", 0, "Field strength of solenoid Bz in kGaus") diff --git a/GPU/GPUTracking/cmake/generateGPUParamHeader.cmake b/GPU/GPUTracking/cmake/generateGPUParamHeader.cmake new file mode 100644 index 0000000000000..5bc7e7bf48b22 --- /dev/null +++ b/GPU/GPUTracking/cmake/generateGPUParamHeader.cmake @@ -0,0 +1,56 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +# file generateGPUParamHeader.cmake +# author Gabriele Cimador + +function(generate_gpu_param_header GPU_ARCH OUT_HEADER) + set(GPU_PARAM_JSON + ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Definitions/GPUParameters.json) + set(TARGET_ARCH_SHORT "UNKNOWN") + if(GPU_ARCH STREQUAL "AUTO") + if(CUDA_COMPUTETARGET) + if(CUDA_COMPUTETARGET MATCHES "86" OR CUDA_COMPUTETARGET MATCHES "89") + set(TARGET_ARCH_SHORT "AMPERE") + endif() + if(CUDA_COMPUTETARGET MATCHES "75") + set(TARGET_ARCH_SHORT "TURING") + endif() + endif() + if(HIP_AMDGPUTARGET) + if(HIP_AMDGPUTARGET MATCHES "gfx906") + set(TARGET_ARCH_SHORT "VEGA") + endif() + if(HIP_AMDGPUTARGET MATCHES "gfx908") + set(TARGET_ARCH_SHORT "MI100") + endif() + if(HIP_AMDGPUTARGET MATCHES "gfx90a") + set(TARGET_ARCH_SHORT "MI210") + endif() + endif() + else() + set(TARGET_ARCH_SHORT "${GPU_ARCH}") + endif() + add_custom_command( + OUTPUT ${OUT_HEADER} + COMMAND ${CMAKE_COMMAND} + -DOUT_HEADER=${OUT_HEADER} + -DGPU_PARAM_JSON=${GPU_PARAM_JSON} + -DTARGET_ARCH_SHORT=${TARGET_ARCH_SHORT} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu_param_header_generator.cmake + DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu_param_header_generator.cmake + ${GPU_PARAM_JSON} + COMMENT "Generating GPU parameter header for ${TARGET_ARCH_SHORT}" + VERBATIM + ) + add_custom_target(GPU_PARAM_HEADER_${GPU_ARCH}_ALL ALL DEPENDS ${OUT_HEADER}) +endfunction() \ No newline at end of file diff --git a/GPU/GPUTracking/cmake/gpu_param_header_generator.cmake b/GPU/GPUTracking/cmake/gpu_param_header_generator.cmake new file mode 100644 index 0000000000000..059752a335e4e --- /dev/null +++ b/GPU/GPUTracking/cmake/gpu_param_header_generator.cmake @@ -0,0 +1,97 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +# file gpu_param_header_generator.cmake +# author Gabriele Cimador + +file(READ "${GPU_PARAM_JSON}" JSON_CONTENT) +file(WRITE "${OUT_HEADER}" "#ifndef GPUDEFPARAMETERSDEFAULTS_H\n#define GPUDEFPARAMETERSDEFAULTS_H\n\n") +file(APPEND "${OUT_HEADER}" "// This file is auto-generated from gpu_params.json. Do not edit directly.\n// ${TARGET_ARCH_SHORT} architecture has been specified.\n\n") +file(APPEND "${OUT_HEADER}" "#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection.\n\n") + +# Types +set(TYPES CORE LB PAR) + +foreach(TYPE IN LISTS TYPES) + # Get all keys of this TYPE as a semicolon-separated list + string(JSON n_params LENGTH "${JSON_CONTENT}" "${TYPE}") + math(EXPR last "${n_params} - 1") + foreach(i RANGE 0 ${last}) + string(JSON param_name MEMBER "${JSON_CONTENT}" "${TYPE}" "${i}") + string(JSON n_archs LENGTH "${JSON_CONTENT}" "${TYPE}" "${param_name}") + math(EXPR last_arch "${n_archs} - 1") + + foreach(iArch RANGE 0 ${last_arch}) + string(JSON arch MEMBER "${JSON_CONTENT}" "${TYPE}" "${param_name}" "${iArch}") + if(arch STREQUAL "${TARGET_ARCH_SHORT}") + string(JSON param_values GET "${JSON_CONTENT}" "${TYPE}" "${param_name}" "${TARGET_ARCH_SHORT}") + if(TYPE STREQUAL "LB") + set(MACRO_NAME "GPUCA_LB_${param_name}") + elseif(TYPE STREQUAL "PAR") + set(MACRO_NAME "GPUCA_PAR_${param_name}") + else() + set(MACRO_NAME "GPUCA_${param_name}") + endif() + set(vals "${param_values}") + string(REGEX REPLACE "^\\[ *" "" vals "${vals}") + string(REGEX REPLACE " *\\]$" "" vals "${vals}") + string(REGEX REPLACE "\"" "" vals "${vals}") + set(MACRO_DEFINITION "#define ${MACRO_NAME} ${vals}") + file(APPEND "${OUT_HEADER}" "${MACRO_DEFINITION}\n") + endif() + endforeach() + endforeach() +endforeach() +file(APPEND "${OUT_HEADER}" "\n// Default parameters if not defined for the target architecture\n\n") +#Default parameters +foreach(TYPE IN LISTS TYPES) + # Get all keys of this TYPE as a semicolon-separated list + string(JSON n_params LENGTH "${JSON_CONTENT}" "${TYPE}") + math(EXPR last "${n_params} - 1") + foreach(i RANGE 0 ${last}) + string(JSON param_name MEMBER "${JSON_CONTENT}" "${TYPE}" "${i}") + string(JSON param_values GET "${JSON_CONTENT}" "${TYPE}" "${param_name}" "default") + if(TYPE STREQUAL "LB") + set(MACRO_NAME "GPUCA_LB_${param_name}") + elseif(TYPE STREQUAL "PAR") + set(MACRO_NAME "GPUCA_PAR_${param_name}") + else() + set(MACRO_NAME "GPUCA_${param_name}") + endif() + set(vals "${param_values}") + string(REGEX REPLACE "^\\[ *" "" vals "${vals}") + string(REGEX REPLACE " *\\]$" "" vals "${vals}") + string(REGEX REPLACE "\"" "" vals "${vals}") + set(MACRO_DEFINITION "#define ${MACRO_NAME} ${vals}") + file(APPEND "${OUT_HEADER}" "#ifndef ${MACRO_NAME}\n ${MACRO_DEFINITION}\n#endif\n\n") + endforeach() +endforeach() +file(APPEND "${OUT_HEADER}" "#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)\n\n") + +#Defaults for non-LB parameters also for CPU fallback +file(APPEND "${OUT_HEADER}" "#ifndef GPUCA_GPUCODE_GENRTC //Defaults for non-LB parameters also for CPU fallback\n\n") # Get all keys of this TYPE as a semicolon-separated list +string(JSON n_params LENGTH "${JSON_CONTENT}" "PAR") +math(EXPR last "${n_params} - 1") +foreach(i RANGE 0 ${last}) + string(JSON param_name MEMBER "${JSON_CONTENT}" "PAR" "${i}") + string(JSON param_values GET "${JSON_CONTENT}" "PAR" "${param_name}" "default_cpu") + set(MACRO_NAME "GPUCA_PAR_${param_name}") + set(vals "${param_values}") + string(REGEX REPLACE "^\\[ *" "" vals "${vals}") + string(REGEX REPLACE " *\\]$" "" vals "${vals}") + string(REGEX REPLACE "\"" "" vals "${vals}") + set(MACRO_DEFINITION "#define ${MACRO_NAME} ${vals}") + file(APPEND "${OUT_HEADER}" "#ifndef ${MACRO_NAME}\n ${MACRO_DEFINITION}\n#endif\n\n") +endforeach() +file(APPEND "${OUT_HEADER}" "\n#endif // GPUCA_GPUCODE_GENRTC\n") + +file(APPEND "${OUT_HEADER}" "\n#endif // GPUDEFPARAMETERSDEFAULTS_H\n") +message(STATUS "Generated ${OUT_HEADER}") diff --git a/GPU/documentation/build-O2.md b/GPU/documentation/build-O2.md index dd21f7e154a63..b04fe562b8c2f 100644 --- a/GPU/documentation/build-O2.md +++ b/GPU/documentation/build-O2.md @@ -37,7 +37,7 @@ Advantages: - One can see enabled GPU features / versions / architectures in the version string of `gpu-system`. Disadvantages: -- Need system `CMake` >= `3.26` for the detsction at aliBuild level. +- Need system `CMake` >= `3.26` for the detection at aliBuild level. - `FindO2GPU.cmake` is duplicated in O2 and alidist and must be kept in sync. But at least this is checked and gives an error otherwise. - Running cmake during the system check takes around 5 sec for every aliBuild command involving O2 or ONNX.