.. _program_listing_file_src_flamegpu_detail_compute_capability.cu: Program Listing for File compute_capability.cu ============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/flamegpu/detail/compute_capability.cu``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include #include #include "flamegpu/detail/compute_capability.cuh" #include "flamegpu/simulation/detail/CUDAErrorChecking.cuh" namespace flamegpu { namespace detail { int compute_capability::getComputeCapability(int deviceIndex) { int major = 0; int minor = 0; // Throw an exception if the deviceIndex is negative. if (deviceIndex < 0) { THROW exception::InvalidCUDAdevice(); } // Ensure deviceIndex is valid. int deviceCount = 0; gpuErrchk(cudaGetDeviceCount(&deviceCount)); if (deviceIndex >= deviceCount) { // Throw an excpetion if the device index is bad. THROW exception::InvalidCUDAdevice(); } // Load device attributes gpuErrchk(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); gpuErrchk(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); // Compute the arch integer value. int arch = (10 * major) + minor; return arch; } int compute_capability::minimumCompiledComputeCapability() { #if defined(FLAMEGPU_MIN_CUDA_ARCH) return FLAMEGPU_MIN_CUDA_ARCH; #else // Return 0 as a default minimum? return 0; #endif } bool compute_capability::checkComputeCapability(int deviceIndex) { // If the compile time minimum architecture is defined, fetch the device's compute capability and check that the executable (probably) supports this device. if (getComputeCapability(deviceIndex) < minimumCompiledComputeCapability()) { return false; } else { return true; } } std::vector compute_capability::getNVRTCSupportedComputeCapabilties() { // NVRTC included with CUDA 11.2+ includes methods to query the supported architectures and CUDA from 11.2+ // Also changes the soname rules such that nvrtc.11.2.so is vald for all nvrtc >= 11.2, and libnvrtc.12.so for CUDA 12.x etc, so this is different at runtime not compile time for future versions, so use the methods #if (__CUDACC_VER_MAJOR__ > 11) || ((__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ >= 2) nvrtcResult nvrtcStatus = NVRTC_SUCCESS; int nvrtcNumSupportedArchs = 0; // Query the number of architecture flags supported by this nvrtc, to allocate enough memory nvrtcStatus = nvrtcGetNumSupportedArchs(&nvrtcNumSupportedArchs); if (nvrtcStatus == NVRTC_SUCCESS && nvrtcNumSupportedArchs > 0) { // prepare a large enough std::vector for the results std::vector nvrtcSupportedArchs = std::vector(nvrtcNumSupportedArchs); assert(nvrtcSupportedArchs.size() >= nvrtcNumSupportedArchs); nvrtcStatus = nvrtcGetSupportedArchs(nvrtcSupportedArchs.data()); if (nvrtcStatus == NVRTC_SUCCESS) { // Return the populated std::vector, this should be RVO'd return nvrtcSupportedArchs; } } // If any of the above functions failed, we have no idea what arch's are supported, so assume none are? return {}; // Older CUDA's do not support this, but this is simple to hard-code for CUDA 11.0/11.1 (and our deprected CUDA 10.x). // CUDA 11.1 suports 35 to 86 #elif (__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ == 1 return {35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80, 86}; // CUDA 11.0 supports 35 to 80 #elif (__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ == 0 return {35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80}; // CUDA 10.x supports 30 to 75 #elif (__CUDACC_VER_MAJOR__ >= 10) return {30, 32, 35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75}; // This should be all cases for FLAME GPU 2, but leave the fallback branch just in case #else return {}; #endif } int compute_capability::selectAppropraiteComputeCapability(const int target, const std::vector& architectures) { int maxArch = 0; for (const int &arch : architectures) { if (arch <= target && arch > maxArch) { maxArch = arch; // The vector is in ascending order, so we can potentially early exit if (arch == target) { return target; } } } return maxArch; } const std::string compute_capability::getDeviceName(int deviceIndex) { // Throw an exception if the deviceIndex is negative. if (deviceIndex < 0) { THROW exception::InvalidCUDAdevice(); } // Ensure deviceIndex is valid. int deviceCount = 0; gpuErrchk(cudaGetDeviceCount(&deviceCount)); if (deviceIndex >= deviceCount) { // Throw an excpetion if the device index is bad. THROW exception::InvalidCUDAdevice(); } // Load device properties cudaDeviceProp prop; cudaGetDeviceProperties(&prop, deviceIndex); return std::string(prop.name); } const std::string compute_capability::getDeviceNames(std::set devices) { std::string device_names; bool first = true; // Get the count of devices int deviceCount = 0; gpuErrchk(cudaGetDeviceCount(&deviceCount)); // If no devices were passed in, add each device to the set of devices. if (devices.size() == 0) { for (int i = 0; i < deviceCount; i++) { devices.emplace_hint(devices.end(), i); } } for (int device_id : devices) { // Throw an exception if the deviceIndex is negative. if (device_id < 0) { THROW exception::InvalidCUDAdevice(); } // Ensure deviceIndex is valid. if (device_id >= deviceCount) { // Throw an exception if the device index is bad. THROW exception::InvalidCUDAdevice(); } // Load device properties cudaDeviceProp prop; cudaGetDeviceProperties(&prop, device_id); if (!first) device_names.append(", "); device_names.append(prop.name); first = false; } return device_names; } } // namespace detail } // namespace flamegpu