Program Listing for File DeviceMacroProperty.cuh
↰ Return to documentation for file (include/flamegpu/runtime/environment/DeviceMacroProperty.cuh
)
#ifndef INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_
#define INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_
#include <cstdint>
#include <limits>
#include <algorithm>
#ifdef __CUDACC__
#include <cuda_runtime.h>
// Needs to be mutually exclusive with definitions in CUDA's sm_60_atomic_functions.h
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
__device__ __forceinline__ double atomicAdd(double* address, double val) {
// cpplint enforces uint64_t, but atomicCAS is implemented for unsigned long long int
unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address); // NOLINT(runtime/int)
unsigned long long int old = *address_as_ull; // NOLINT(runtime/int)
unsigned long long int assumed = old; // NOLINT(runtime/int)
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while (assumed != old);
return __longlong_as_double(old);
}
#endif // __CUDA_ARCH__ < 600
#endif // __CUDAACC__
namespace flamegpu {
template<typename T, unsigned int I = 1, unsigned int J = 1, unsigned int K = 1, unsigned int W = 1>
class ReadOnlyDeviceMacroProperty {
protected:
T* ptr;
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
unsigned int* read_write_flag;
__device__ void setCheckReadFlag() const;
__device__ void setCheckWriteFlag() const;
#endif
public:
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
__device__ explicit ReadOnlyDeviceMacroProperty(T* _ptr, unsigned int* _rwf);
#else
__device__ explicit ReadOnlyDeviceMacroProperty(T* _ptr);
#endif
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, J, K, W, 1> operator[](unsigned int i) const;
__device__ __forceinline__ operator T() const;
};
template<typename T, unsigned int I = 1, unsigned int J = 1, unsigned int K = 1, unsigned int W = 1>
class DeviceMacroProperty : public ReadOnlyDeviceMacroProperty<T, I, J, K, W> {
public:
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
__device__ explicit DeviceMacroProperty(T* _ptr, unsigned int *_rwf);
#else
__device__ explicit DeviceMacroProperty(T* _ptr);
#endif
__device__ __forceinline__ DeviceMacroProperty<T, J, K, W, 1> operator[](unsigned int i) const;
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& operator +=(T val);
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& operator -=(T val);
__device__ __forceinline__ T operator+(T val) const;
__device__ __forceinline__ T operator-(T val) const;
__device__ __forceinline__ T operator++();
__device__ __forceinline__ T operator--();
__device__ __forceinline__ T operator++(int);
__device__ __forceinline__ T operator--(int);
__device__ __forceinline__ T min(T val);
__device__ __forceinline__ T max(T val);
__device__ __forceinline__ T CAS(T compare, T val);
__device__ __forceinline__ T exchange(T val);
};
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::ReadOnlyDeviceMacroProperty(T* _ptr, unsigned int* _rwf)
: ptr(_ptr)
, read_write_flag(_rwf)
{ }
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>::DeviceMacroProperty(T* _ptr, unsigned int* _rwf)
: ReadOnlyDeviceMacroProperty<T, I, J, K, W>(_ptr, _rwf)
{ }
#ifdef __CUDACC__
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ void ReadOnlyDeviceMacroProperty<T, I, J, K, W>::setCheckReadFlag() const {
const unsigned int old = atomicOr(read_write_flag, 1u << 0);
if (old & 1u << 1) {
DTHROW("DeviceMacroProperty read and atomic write operations cannot be mixed in the same layer, as this may cause race conditions.\n");
return;
}
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ void ReadOnlyDeviceMacroProperty<T, I, J, K, W>::setCheckWriteFlag() const {
const unsigned int old = atomicOr(read_write_flag, 1u << 1);
if (old & 1u << 0) {
DTHROW("DeviceMacroProperty read and atomic write operations cannot be mixed in the same layer as this may cause race conditions.\n");
return;
}
}
#endif
#else
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::ReadOnlyDeviceMacroProperty(T* _ptr)
:ptr(_ptr)
{ }
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I , J, K, W>::DeviceMacroProperty(T* _ptr)
: ReadOnlyDeviceMacroProperty<T, I, J, K, W>(_ptr)
{ }
#endif
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, J, K, W, 1> ReadOnlyDeviceMacroProperty<T, I, J, K, W>::operator[](unsigned int i) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I == 1 && J == 1 && K == 1 && W == 1) {
DTHROW("Indexing error, property has less dimensions.\n");
return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
} else if (i >= I) {
DTHROW("Indexing error, out of bounds %u >= %u.\n", i, I);
return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
} else if (this->ptr == nullptr) {
return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
}
#endif
// (i * J * K * W) + (j * K * W) + (k * W) + w
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W), this->read_write_flag);
#else
return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W));
#endif
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, J, K, W, 1> DeviceMacroProperty<T, I, J, K, W>::operator[](unsigned int i) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I == 1 && J == 1 && K == 1 && W == 1) {
DTHROW("Indexing error, property has less dimensions.\n");
return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
} else if (i >= I) {
DTHROW("Indexing error, out of bounds %u >= %u.\n", i, I);
return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
} else if (this->ptr == nullptr) {
return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
}
#endif
// (i * J * K * W) + (j * K * W) + (k * W) + w
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W), this->read_write_flag);
#else
return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W));
#endif
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::operator T() const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckReadFlag();
#endif
return *this->ptr;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& DeviceMacroProperty<T, I, J, K, W>::operator+=(const T val) {
static_assert(std::is_same<T, int32_t>::value ||
std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value, "atomic add only supports the types int32_t/uint32_t/uint64_t/float/double.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return *this;
} else if (this->ptr == nullptr) {
return *this;
}
this->setCheckWriteFlag();
#endif
atomicAdd(this->ptr, val);
return *this;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& DeviceMacroProperty<T, I, J, K, W>::operator-=(const T val) {
static_assert(std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value, "atomic subtract only supports the types int32_t/uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return *this;
} else if (this->ptr == nullptr) {
return *this;
}
this->setCheckWriteFlag();
#endif
atomicSub(this->ptr, val);
return *this;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator+(const T val) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckReadFlag();
#endif
return *this->ptr + val;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator-(const T val) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckReadFlag();
#endif
return *this->ptr - val;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator++() {
static_assert(std::is_same<T, uint32_t>::value, "atomic increment only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return *this;
} else if (this->ptr == nullptr) {
return *this;
}
this->setCheckWriteFlag();
#endif
const T old = atomicInc(this->ptr, std::numeric_limits<T>::max());
return ((old >= std::numeric_limits<T>::max()) ? 0 : (old + 1));
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator--() {
static_assert(std::is_same<T, uint32_t>::value, "atomic decrement only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return *this;
} else if (this->ptr == nullptr) {
return *this;
}
this->setCheckWriteFlag();
#endif
const T old = atomicDec(this->ptr, std::numeric_limits<T>::max());
return (((old == 0) || (old > std::numeric_limits<T>::max())) ? std::numeric_limits<T>::max() : (old - 1));
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator++(int) {
static_assert(std::is_same<T, uint32_t>::value, "atomic increment only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
return atomicInc(this->ptr, std::numeric_limits<T>::max());
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator--(int) {
static_assert(std::is_same<T, uint32_t>::value, "atomic decrement only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
return atomicDec(this->ptr, std::numeric_limits<T>::max());
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::min(T val) {
static_assert(std::is_same<T, int32_t>::value ||
std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value, "atomic min only supports the types int32_t/uint32_t/uint64_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
return std::min(atomicMin(this->ptr, val), val);
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::max(T val) {
static_assert(std::is_same<T, int32_t>::value ||
std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value, "atomic max only supports the types int32_t/uint32_t/uint64_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
return std::max(atomicMax(this->ptr, val), val);
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::CAS(T compare, T val) {
static_assert(std::is_same<T, int32_t>::value ||
std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value ||
std::is_same<T, uint16_t>::value, "atomic compare and swap only supports the types int32_t/uint32_t/uint64_t/uint16_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
return atomicCAS(this->ptr, compare, val);
}
// GCC doesn't like seeing atomicExch with host compiler
#ifdef __CUDACC__
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_suppress = initialization_not_reachable
#else
#pragma diag_suppress = initialization_not_reachable
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::exchange(T val) {
static_assert(std::is_same<T, int32_t>::value ||
std::is_same<T, int64_t>::value ||
std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value, "atomic exchange only supports the types int32_t/int64_t/uint32_t/uint64_t/float/double.");
static_assert(sizeof(uint64_t) == sizeof(unsigned long long int), "uint64_t != unsigned long long int."); // NOLINT(runtime/int)
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
if (I != 1 || J != 1 || K != 1 || W != 1) {
DTHROW("Indexing error, property has more dimensions.\n");
return { };
} else if (this->ptr == nullptr) {
return { };
}
this->setCheckWriteFlag();
#endif
if (sizeof(T) == sizeof(uint64_t)) { // Convert all 64 bit types to unsigned long long int (can't build as uint64_t on gcc)
const unsigned long long int rval = atomicExch(reinterpret_cast<unsigned long long int*>(this->ptr), *reinterpret_cast<unsigned long long int*>(&val)); // NOLINT(runtime/int)
return *reinterpret_cast<const T*>(&rval);
}
// else 32-bit
const uint32_t rval = atomicExch(reinterpret_cast<uint32_t*>(this->ptr), *reinterpret_cast<uint32_t*>(&val));
return *reinterpret_cast<const T*>(&rval);
// return atomicExch(this->ptr, val);
}
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_default = initialization_not_reachable
#else
#pragma diag_default = initialization_not_reachable
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
#endif // __CUDACC__
} // namespace flamegpu
#endif // INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_