Program Listing for File DeviceMacroProperty.cuh

Return to documentation for file (include/flamegpu/runtime/environment/DeviceMacroProperty.cuh)

#ifndef INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_
#define INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_

#include <cstdint>
#include <limits>
#include <algorithm>

#ifdef __CUDACC__
#include <cuda_runtime.h>
// Needs to be mutually exclusive with definitions in CUDA's sm_60_atomic_functions.h
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
__device__ __forceinline__ double atomicAdd(double* address, double val) {
    // cpplint enforces uint64_t, but atomicCAS is implemented for unsigned long long int
    unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);  // NOLINT(runtime/int)
    unsigned long long int old = *address_as_ull;  // NOLINT(runtime/int)
    unsigned long long int assumed = old;  // NOLINT(runtime/int)
    do {
        assumed = old;
        old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
    // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
    } while (assumed != old);
    return __longlong_as_double(old);
}
#endif  // __CUDA_ARCH__ < 600
#endif  // __CUDAACC__

namespace flamegpu {

template<typename T, unsigned int I = 1, unsigned int J = 1, unsigned int K = 1, unsigned int W = 1>
class ReadOnlyDeviceMacroProperty {
 protected:
    T* ptr;
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    unsigned int* read_write_flag;
    __device__ void setCheckReadFlag() const;
    __device__ void setCheckWriteFlag() const;
#endif

 public:
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
     __device__ explicit ReadOnlyDeviceMacroProperty(T* _ptr, unsigned int* _rwf);
#else
     __device__ explicit ReadOnlyDeviceMacroProperty(T* _ptr);
#endif
     __device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, J, K, W, 1> operator[](unsigned int i) const;
     __device__ __forceinline__ operator T() const;
};
template<typename T, unsigned int I = 1, unsigned int J = 1, unsigned int K = 1, unsigned int W = 1>
class DeviceMacroProperty : public ReadOnlyDeviceMacroProperty<T, I, J, K, W> {
 public:
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    __device__ explicit DeviceMacroProperty(T* _ptr, unsigned int *_rwf);
#else
     __device__ explicit DeviceMacroProperty(T* _ptr);
#endif
    __device__ __forceinline__ DeviceMacroProperty<T, J, K, W, 1> operator[](unsigned int i) const;
    __device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& operator +=(T val);
    __device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& operator -=(T val);
    __device__ __forceinline__ T operator+(T val) const;
    __device__ __forceinline__ T operator-(T val) const;
    __device__ __forceinline__ T operator++();
    __device__ __forceinline__ T operator--();
    __device__ __forceinline__ T operator++(int);
    __device__ __forceinline__ T operator--(int);
    __device__ __forceinline__ T min(T val);
    __device__ __forceinline__ T max(T val);
    __device__ __forceinline__ T CAS(T compare, T val);
    __device__ __forceinline__ T exchange(T val);
};

#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::ReadOnlyDeviceMacroProperty(T* _ptr, unsigned int* _rwf)
    : ptr(_ptr)
    , read_write_flag(_rwf)
{ }
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>::DeviceMacroProperty(T* _ptr, unsigned int* _rwf)
    : ReadOnlyDeviceMacroProperty<T, I, J, K, W>(_ptr, _rwf)
{ }
#ifdef __CUDACC__
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ void ReadOnlyDeviceMacroProperty<T, I, J, K, W>::setCheckReadFlag() const {
    const unsigned int old = atomicOr(read_write_flag, 1u << 0);
    if (old & 1u << 1) {
        DTHROW("DeviceMacroProperty read and atomic write operations cannot be mixed in the same layer, as this may cause race conditions.\n");
        return;
    }
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ void ReadOnlyDeviceMacroProperty<T, I, J, K, W>::setCheckWriteFlag() const {
    const unsigned int old = atomicOr(read_write_flag, 1u << 1);
    if (old & 1u << 0) {
        DTHROW("DeviceMacroProperty read and atomic write operations cannot be mixed in the same layer as this may cause race conditions.\n");
        return;
    }
}
#endif
#else
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::ReadOnlyDeviceMacroProperty(T* _ptr)
    :ptr(_ptr)
{ }
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I , J, K, W>::DeviceMacroProperty(T* _ptr)
    : ReadOnlyDeviceMacroProperty<T, I, J, K, W>(_ptr)
{ }
#endif
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, J, K, W, 1> ReadOnlyDeviceMacroProperty<T, I, J, K, W>::operator[](unsigned int i) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I == 1 && J == 1 && K == 1 && W == 1) {
        DTHROW("Indexing error, property has less dimensions.\n");
        return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    } else if (i >= I) {
        DTHROW("Indexing error, out of bounds %u >= %u.\n", i, I);
        return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    } else if (this->ptr == nullptr) {
        return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    }
#endif
    // (i * J * K * W) + (j * K * W) + (k * W) + w
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    return ReadOnlyDeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W), this->read_write_flag);
#else
    return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W));
#endif
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, J, K, W, 1> DeviceMacroProperty<T, I, J, K, W>::operator[](unsigned int i) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I == 1 && J == 1 && K == 1 && W == 1) {
        DTHROW("Indexing error, property has less dimensions.\n");
        return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    } else if (i >= I) {
        DTHROW("Indexing error, out of bounds %u >= %u.\n", i, I);
        return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    } else if (this->ptr == nullptr) {
        return DeviceMacroProperty<T, J, K, W, 1>(nullptr, nullptr);
    }
#endif
    // (i * J * K * W) + (j * K * W) + (k * W) + w
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W), this->read_write_flag);
#else
    return DeviceMacroProperty<T, J, K, W, 1>(this->ptr + (i * J * K * W));
#endif
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ ReadOnlyDeviceMacroProperty<T, I, J, K, W>::operator T() const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckReadFlag();
#endif
    return *this->ptr;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& DeviceMacroProperty<T, I, J, K, W>::operator+=(const T val) {
    static_assert(std::is_same<T, int32_t>::value ||
        std::is_same<T, uint32_t>::value ||
        std::is_same<T, uint64_t>::value ||
        std::is_same<T, float>::value ||
        std::is_same<T, double>::value, "atomic add only supports the types int32_t/uint32_t/uint64_t/float/double.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return *this;
    } else if (this->ptr == nullptr) {
        return *this;
    }
    this->setCheckWriteFlag();
#endif
    atomicAdd(this->ptr, val);
    return *this;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ DeviceMacroProperty<T, I, J, K, W>& DeviceMacroProperty<T, I, J, K, W>::operator-=(const T val) {
    static_assert(std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value, "atomic subtract only supports the types int32_t/uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return *this;
    } else if (this->ptr == nullptr) {
        return *this;
    }
    this->setCheckWriteFlag();
#endif
    atomicSub(this->ptr, val);
    return *this;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator+(const T val) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckReadFlag();
#endif
    return *this->ptr + val;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator-(const T val) const {
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckReadFlag();
#endif
    return *this->ptr - val;
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator++() {
    static_assert(std::is_same<T, uint32_t>::value, "atomic increment only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return *this;
    } else if (this->ptr == nullptr) {
        return *this;
    }
    this->setCheckWriteFlag();
#endif
    const T old = atomicInc(this->ptr, std::numeric_limits<T>::max());
    return ((old >= std::numeric_limits<T>::max()) ? 0 : (old + 1));
}

template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator--() {
    static_assert(std::is_same<T, uint32_t>::value, "atomic decrement only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return *this;
    } else if (this->ptr == nullptr) {
        return *this;
    }
    this->setCheckWriteFlag();
#endif
    const T old = atomicDec(this->ptr, std::numeric_limits<T>::max());
    return  (((old == 0) || (old > std::numeric_limits<T>::max())) ? std::numeric_limits<T>::max() : (old - 1));
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator++(int) {
    static_assert(std::is_same<T, uint32_t>::value, "atomic increment only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    return atomicInc(this->ptr, std::numeric_limits<T>::max());
}

template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::operator--(int) {
    static_assert(std::is_same<T, uint32_t>::value, "atomic decrement only supports the type uint32_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    return atomicDec(this->ptr, std::numeric_limits<T>::max());
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::min(T val) {
    static_assert(std::is_same<T, int32_t>::value ||
        std::is_same<T, uint32_t>::value ||
        std::is_same<T, uint64_t>::value, "atomic min only supports the types int32_t/uint32_t/uint64_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    return std::min(atomicMin(this->ptr, val), val);
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::max(T val) {
    static_assert(std::is_same<T, int32_t>::value ||
        std::is_same<T, uint32_t>::value ||
        std::is_same<T, uint64_t>::value, "atomic max only supports the types int32_t/uint32_t/uint64_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    return std::max(atomicMax(this->ptr, val), val);
}
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::CAS(T compare, T val) {
    static_assert(std::is_same<T, int32_t>::value ||
        std::is_same<T, uint32_t>::value ||
        std::is_same<T, uint64_t>::value ||
        std::is_same<T, uint16_t>::value, "atomic compare and swap only supports the types int32_t/uint32_t/uint64_t/uint16_t.");
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    return atomicCAS(this->ptr, compare, val);
}

// GCC doesn't like seeing atomicExch with host compiler
#ifdef __CUDACC__
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_suppress = initialization_not_reachable
#else
#pragma diag_suppress = initialization_not_reachable
#endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
template<typename T, unsigned int I, unsigned int J, unsigned int K, unsigned int W>
__device__ __forceinline__ T DeviceMacroProperty<T, I, J, K, W>::exchange(T val) {
    static_assert(std::is_same<T, int32_t>::value ||
        std::is_same<T, int64_t>::value ||
        std::is_same<T, uint32_t>::value ||
        std::is_same<T, uint64_t>::value ||
        std::is_same<T, float>::value ||
        std::is_same<T, double>::value, "atomic exchange only supports the types int32_t/int64_t/uint32_t/uint64_t/float/double.");
    static_assert(sizeof(uint64_t) == sizeof(unsigned long long int), "uint64_t != unsigned long long int.");  // NOLINT(runtime/int)
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
    if (I != 1 || J != 1 || K != 1 || W != 1) {
        DTHROW("Indexing error, property has more dimensions.\n");
        return { };
    } else if (this->ptr == nullptr) {
        return { };
    }
    this->setCheckWriteFlag();
#endif
    if (sizeof(T) == sizeof(uint64_t)) {  // Convert all 64 bit types to unsigned long long int (can't build as uint64_t on gcc)
        const unsigned long long int rval = atomicExch(reinterpret_cast<unsigned long long int*>(this->ptr), *reinterpret_cast<unsigned long long int*>(&val));  // NOLINT(runtime/int)
        return *reinterpret_cast<const T*>(&rval);
    }
    // else 32-bit
    const uint32_t rval = atomicExch(reinterpret_cast<uint32_t*>(this->ptr), *reinterpret_cast<uint32_t*>(&val));
    return *reinterpret_cast<const T*>(&rval);
    // return atomicExch(this->ptr, val);
}
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_default = initialization_not_reachable
#else
#pragma diag_default = initialization_not_reachable
#endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
#endif  // __CUDACC__

}  // namespace flamegpu

#endif  // INCLUDE_FLAMEGPU_RUNTIME_ENVIRONMENT_DEVICEMACROPROPERTY_CUH_