Program Listing for File compute_capability.cu
↰ Return to documentation for file (src/flamegpu/detail/compute_capability.cu
)
#include <nvrtc.h>
#include <cassert>
#include <vector>
#include <string>
#include "flamegpu/detail/compute_capability.cuh"
#include "flamegpu/simulation/detail/CUDAErrorChecking.cuh"
namespace flamegpu {
namespace detail {
int compute_capability::getComputeCapability(int deviceIndex) {
int major = 0;
int minor = 0;
// Throw an exception if the deviceIndex is negative.
if (deviceIndex < 0) {
THROW exception::InvalidCUDAdevice();
}
// Ensure deviceIndex is valid.
int deviceCount = 0;
gpuErrchk(cudaGetDeviceCount(&deviceCount));
if (deviceIndex >= deviceCount) {
// Throw an excpetion if the device index is bad.
THROW exception::InvalidCUDAdevice();
}
// Load device attributes
gpuErrchk(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex));
gpuErrchk(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex));
// Compute the arch integer value.
int arch = (10 * major) + minor;
return arch;
}
int compute_capability::minimumCompiledComputeCapability() {
#if defined(FLAMEGPU_MIN_CUDA_ARCH)
return FLAMEGPU_MIN_CUDA_ARCH;
#else
// Return 0 as a default minimum?
return 0;
#endif
}
bool compute_capability::checkComputeCapability(int deviceIndex) {
// If the compile time minimum architecture is defined, fetch the device's compute capability and check that the executable (probably) supports this device.
if (getComputeCapability(deviceIndex) < minimumCompiledComputeCapability()) {
return false;
} else {
return true;
}
}
std::vector<int> compute_capability::getNVRTCSupportedComputeCapabilties() {
// NVRTC included with CUDA 11.2+ includes methods to query the supported architectures and CUDA from 11.2+
// Also changes the soname rules such that nvrtc.11.2.so is vald for all nvrtc >= 11.2, and libnvrtc.12.so for CUDA 12.x etc, so this is different at runtime not compile time for future versions, so use the methods
#if (__CUDACC_VER_MAJOR__ > 11) || ((__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ >= 2)
nvrtcResult nvrtcStatus = NVRTC_SUCCESS;
int nvrtcNumSupportedArchs = 0;
// Query the number of architecture flags supported by this nvrtc, to allocate enough memory
nvrtcStatus = nvrtcGetNumSupportedArchs(&nvrtcNumSupportedArchs);
if (nvrtcStatus == NVRTC_SUCCESS && nvrtcNumSupportedArchs > 0) {
// prepare a large enough std::vector for the results
std::vector<int> nvrtcSupportedArchs = std::vector<int>(nvrtcNumSupportedArchs);
assert(nvrtcSupportedArchs.size() >= nvrtcNumSupportedArchs);
nvrtcStatus = nvrtcGetSupportedArchs(nvrtcSupportedArchs.data());
if (nvrtcStatus == NVRTC_SUCCESS) {
// Return the populated std::vector, this should be RVO'd
return nvrtcSupportedArchs;
}
}
// If any of the above functions failed, we have no idea what arch's are supported, so assume none are?
return {};
// Older CUDA's do not support this, but this is simple to hard-code for CUDA 11.0/11.1 (and our deprected CUDA 10.x).
// CUDA 11.1 suports 35 to 86
#elif (__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ == 1
return {35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80, 86};
// CUDA 11.0 supports 35 to 80
#elif (__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ == 0
return {35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80};
// CUDA 10.x supports 30 to 75
#elif (__CUDACC_VER_MAJOR__ >= 10)
return {30, 32, 35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75};
// This should be all cases for FLAME GPU 2, but leave the fallback branch just in case
#else
return {};
#endif
}
int compute_capability::selectAppropraiteComputeCapability(const int target, const std::vector<int>& architectures) {
int maxArch = 0;
for (const int &arch : architectures) {
if (arch <= target && arch > maxArch) {
maxArch = arch;
// The vector is in ascending order, so we can potentially early exit
if (arch == target) {
return target;
}
}
}
return maxArch;
}
const std::string compute_capability::getDeviceName(int deviceIndex) {
// Throw an exception if the deviceIndex is negative.
if (deviceIndex < 0) {
THROW exception::InvalidCUDAdevice();
}
// Ensure deviceIndex is valid.
int deviceCount = 0;
gpuErrchk(cudaGetDeviceCount(&deviceCount));
if (deviceIndex >= deviceCount) {
// Throw an excpetion if the device index is bad.
THROW exception::InvalidCUDAdevice();
}
// Load device properties
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, deviceIndex);
return std::string(prop.name);
}
const std::string compute_capability::getDeviceNames(std::set<int> devices) {
std::string device_names;
bool first = true;
// Get the count of devices
int deviceCount = 0;
gpuErrchk(cudaGetDeviceCount(&deviceCount));
// If no devices were passed in, add each device to the set of devices.
if (devices.size() == 0) {
for (int i = 0; i < deviceCount; i++) {
devices.emplace_hint(devices.end(), i);
}
}
for (int device_id : devices) {
// Throw an exception if the deviceIndex is negative.
if (device_id < 0) {
THROW exception::InvalidCUDAdevice();
}
// Ensure deviceIndex is valid.
if (device_id >= deviceCount) {
// Throw an exception if the device index is bad.
THROW exception::InvalidCUDAdevice();
}
// Load device properties
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device_id);
if (!first)
device_names.append(", ");
device_names.append(prop.name);
first = false;
}
return device_names;
}
} // namespace detail
} // namespace flamegpu