Program Listing for File CUDAMacroEnvironment.cu

↰ Return to documentation for file (src/flamegpu/simulation/detail/CUDAMacroEnvironment.cu)
#include "flamegpu/simulation/detail/CUDAMacroEnvironment.h"

#include <string>
#include <memory>
#include <map>
#include <vector>

#include "flamegpu/model/EnvironmentDescription.h"
#include "flamegpu/simulation/CUDASimulation.h"
#include "flamegpu/model/AgentFunctionData.cuh"
#include "flamegpu/model/SubEnvironmentData.h"
#include "flamegpu/runtime/detail/curve/curve_rtc.cuh"
#include "flamegpu/detail/cuda.cuh"

namespace flamegpu {
namespace detail {

CUDAMacroEnvironment::CUDAMacroEnvironment(const EnvironmentData& description, const CUDASimulation& _cudaSimulation)
    : cudaSimulation(_cudaSimulation) {
    for (const auto &p : description.macro_properties) {
        properties.emplace(p.first, MacroEnvProp(p.second.type, p.second.type_size, p.second.elements));
    }
}

void CUDAMacroEnvironment::init(cudaStream_t _stream) {
    this->stream = _stream;
    for (auto &prop : properties) {
        if (!prop.second.d_ptr) {
            size_t buffer_size = prop.second.type_size
                                     * prop.second.elements[0]
                                     * prop.second.elements[1]
                                     * prop.second.elements[2]
                                     * prop.second.elements[3];
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
            buffer_size += sizeof(unsigned int);  // Extra uint is used as read-write flag by seatbelts
#endif
            gpuErrchk(cudaMalloc(&prop.second.d_ptr, buffer_size));
            gpuErrchk(cudaMemsetAsync(prop.second.d_ptr, 0, buffer_size, _stream));
        }
    }
    gpuErrchk(cudaStreamSynchronize(_stream));
}

void CUDAMacroEnvironment::init(const SubEnvironmentData& mapping, std::shared_ptr<const detail::CUDAMacroEnvironment> master_macro_env, cudaStream_t _stream) {
    this->stream = _stream;
    // Map local properties
    for (auto& prop : properties) {
        if (!prop.second.d_ptr) {
            auto sub = mapping.macro_properties.find(prop.first);
            if (sub == mapping.macro_properties.end()) {
                // If it's a local macro property
                    size_t buffer_size = prop.second.type_size
                        * prop.second.elements[0]
                        * prop.second.elements[1]
                        * prop.second.elements[2]
                        * prop.second.elements[3];
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
                    buffer_size += sizeof(unsigned int);  // Extra uint is used as read-write flag by seatbelts
#endif
                    gpuErrchk(cudaMalloc(&prop.second.d_ptr, buffer_size));
                    gpuErrchk(cudaMemsetAsync(prop.second.d_ptr, 0, buffer_size, _stream));
            } else {
                // If it's a mapped sub macro property
                auto mmp = master_macro_env->properties.find(sub->second);
                if (mmp != master_macro_env->properties.end()
                    && mmp->second.d_ptr
                    && mmp->second.elements == prop.second.elements
                    && mmp->second.type == prop.second.type) {
                    prop.second.d_ptr = mmp->second.d_ptr;
                    prop.second.is_sub = true;
                } else {
                    THROW exception::UnknownInternalError("Unable to initialise mapped macro property '%s' to '%s', this should not have failed, "
                    "in CUDAMacroEnvironment::init()\n",
                    prop.first.c_str(), sub->second.c_str());
                }
            }
        }
    }
    gpuErrchk(cudaStreamSynchronize(_stream));
}
void CUDAMacroEnvironment::free() {
    for (auto& prop : properties) {
        if (prop.second.d_ptr) {
            if (!prop.second.is_sub) {
                gpuErrchk(flamegpu::detail::cuda::cudaFree(prop.second.d_ptr));
            }
            prop.second.d_ptr = nullptr;
        }
    }
}
void CUDAMacroEnvironment::registerCurveVariables(detail::curve::HostCurve& curve) const {
    for (const auto& p : properties) {
        const unsigned int total_elements = p.second.elements[0] * p.second.elements[1] * p.second.elements[2] * p.second.elements[3];
        curve.registerSetMacroEnvironmentProperty(p.first, p.second.type, p.second.type_size, total_elements, p.second.d_ptr);
    }
}
void CUDAMacroEnvironment::mapRTCVariables(detail::curve::CurveRTCHost& curve_header) const {
    for (const auto &p : properties) {
        curve_header.registerEnvMacroProperty(p.first.c_str(), p.second.d_ptr, p.second.type.name(), p.second.type_size, p.second.elements);
    }
}
void CUDAMacroEnvironment::unmapRTCVariables(detail::curve::CurveRTCHost& curve_header) const {
    for (const auto &p : properties) {
        curve_header.unregisterEnvMacroProperty(p.first.c_str());
    }
}
const std::map<std::string, CUDAMacroEnvironment::MacroEnvProp>& CUDAMacroEnvironment::getPropertiesMap() const {
    return properties;
}

std::shared_ptr<HostMacroProperty_MetaData> CUDAMacroEnvironment::getHostPropertyMetadata(const std::string property_name) {
    auto cache = host_cache.find(property_name);
    if (cache != host_cache.end()) {
        return cache->second.lock();
    }
    return nullptr;
}
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
void CUDAMacroEnvironment::resetFlagsAsync(const std::vector<cudaStream_t> &streams) {
    unsigned int i = 0;
    for (const auto& prop : properties) {
        if (prop.second.d_ptr) {
            const size_t buffer_size = prop.second.type_size
                * prop.second.elements[0]
                * prop.second.elements[1]
                * prop.second.elements[2]
                * prop.second.elements[3];
            gpuErrchk(cudaMemsetAsync(static_cast<char*>(prop.second.d_ptr) + buffer_size, 0 , sizeof(unsigned int), streams[i++%streams.size()]));
        }
    }
    // Disable the sync here, users must sync themselves
    // if (properties.size()) {
    //     gpuErrchk(cudaDeviceSynchronize());
    // }
}
bool CUDAMacroEnvironment::getDeviceReadFlag(const std::string& property_name) {
    const auto prop = properties.find(property_name);
    if (prop == properties.end()) {
        THROW flamegpu::exception::InvalidEnvProperty("The environment macro property '%s' was not found, "
            "in CUDAMacroEnvironment::getDeviceReadFlag()\n",
            property_name.c_str());
    }
    const size_t buffer_size = prop->second.type_size
        * prop->second.elements[0]
        * prop->second.elements[1]
        * prop->second.elements[2]
        * prop->second.elements[3];
    unsigned int ret = 0;
    gpuErrchk(cudaMemcpyAsync(&ret, static_cast<char*>(prop->second.d_ptr) + buffer_size, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
    gpuErrchk(cudaStreamSynchronize(stream));
    return (ret & 1u << 0);
}
bool CUDAMacroEnvironment::getDeviceWriteFlag(const std::string& property_name) {
    const auto prop = properties.find(property_name);
    if (prop == properties.end()) {
        THROW flamegpu::exception::InvalidEnvProperty("The environment macro property '%s' was not found, "
            "in CUDAMacroEnvironment::getDeviceWriteFlag()\n",
            property_name.c_str());
    }
    const size_t buffer_size = prop->second.type_size
        * prop->second.elements[0]
        * prop->second.elements[1]
        * prop->second.elements[2]
        * prop->second.elements[3];
    unsigned int ret = 0;
    gpuErrchk(cudaMemcpyAsync(&ret, static_cast<char*>(prop->second.d_ptr) + buffer_size, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
    gpuErrchk(cudaStreamSynchronize(stream));
    return (ret & 1u << 1);
}
unsigned int CUDAMacroEnvironment::getDeviceRWFlags(const std::string& property_name) {
    const auto prop = properties.find(property_name);
    if (prop == properties.end()) {
        THROW flamegpu::exception::InvalidEnvProperty("The environment macro property '%s' was not found, "
            "in CUDAMacroEnvironment::getDeviceRWFlags()\n",
            property_name.c_str());
    }
    const size_t buffer_size = prop->second.type_size
        * prop->second.elements[0]
        * prop->second.elements[1]
        * prop->second.elements[2]
        * prop->second.elements[3];
    unsigned int ret = 0;
    gpuErrchk(cudaMemcpyAsync(&ret, static_cast<char*>(prop->second.d_ptr) + buffer_size, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
    gpuErrchk(cudaStreamSynchronize(stream));
    return ret;
}
#endif
}  // namespace detail
}  // namespace flamegpu