Program Listing for File CUDAMessageList.cu

Return to documentation for file (src/flamegpu/simulation/detail/CUDAMessageList.cu)

#include <cuda_runtime.h>
#include <device_launch_parameters.h>

#include <string>
#include <utility>

#include "flamegpu/simulation/detail/CUDAMessageList.h"
#include "flamegpu/simulation/detail/CUDAMessage.h"
#include "flamegpu/simulation/detail/CUDAErrorChecking.cuh"
#include "flamegpu/runtime/messaging/MessageBruteForce/MessageBruteForceHost.h"
#include "flamegpu/simulation/detail/CUDAScatter.cuh"
#include "flamegpu/detail/cuda.cuh"

namespace flamegpu {
namespace detail {

CUDAMessageList::CUDAMessageList(CUDAMessage& cuda_message, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId)
    : message(cuda_message) {
    // allocate message lists
    allocateDeviceMessageList(d_list);
    allocateDeviceMessageList(d_swap_list);
    zeroDeviceMessageList_async(d_list, stream);
    zeroDeviceMessageList_async(d_swap_list, stream);
    gpuErrchk(cudaStreamSynchronize(stream));
}

CUDAMessageList::~CUDAMessageList() {
    cleanupAllocatedData();
}

void CUDAMessageList::cleanupAllocatedData() {
    // clean up
    releaseDeviceMessageList(d_list);
    releaseDeviceMessageList(d_swap_list);
}

void CUDAMessageList::allocateDeviceMessageList(CUDAMessageMap &memory_map) {
    // we use the  messages memory map to iterate the  message variables and do allocation within our GPU hash map
    const auto &mem = message.getMessageData().variables;

    // for each variable allocate a device array and add to map
    for (const auto &mm : mem) {
        // get the variable name
        std::string var_name = mm.first;

        // get the variable size from  message description
        size_t var_size = mm.second.type_size * mm.second.elements;

        // do the device allocation
        void * d_ptr;

#ifdef UNIFIED_GPU_MEMORY
        // unified memory allocation
        gpuErrchk(cudaMallocManaged(reinterpret_cast<void**>(&d_ptr), var_size *  message.getMaximumListSize()))
#else
        // non unified memory allocation
        gpuErrchk(cudaMalloc(reinterpret_cast<void**>(&d_ptr), var_size * message.getMaximumListSize()));
#endif

        // store the pointer in the map
        memory_map.insert(CUDAMessageMap::value_type(var_name, d_ptr));
    }
}
void CUDAMessageList::resize(CUDAScatter& scatter, cudaStream_t stream, unsigned int streamId, unsigned int keep_len) {
    // Release d_swap_list, we don't retain this data
    releaseDeviceMessageList(d_swap_list);
    // Allocate the new d_list
    CUDAMessageMap d_list_old;
    std::swap(d_list, d_list_old);
    allocateDeviceMessageList(d_list);
    if (keep_len && keep_len <= message.getMessageCount()) {
        // Copy data from d_list_old to d_list
        // Note, if keep_len exceeds length of d_swap_list_old, this will crash
        scatter.scatterAll(streamId,
            stream,
            message.getMessageData().variables,
            d_list_old, d_list,
            keep_len,
            0);
    }
    // Release d_list_old
    releaseDeviceMessageList(d_list_old);
    // Allocate the new d_swap_list
    allocateDeviceMessageList(d_swap_list);
    // Zero any new buffers with undefined data
    zeroDeviceMessageList_async(d_list, stream, keep_len);
    zeroDeviceMessageList_async(d_swap_list, stream);
    gpuErrchk(cudaStreamSynchronize(stream));
}

void CUDAMessageList::releaseDeviceMessageList(CUDAMessageMap& memory_map) {
    // for each device pointer in the cuda memory map we need to free these
    for (const auto &mm : memory_map) {
        // free the memory on the device
        gpuErrchk(flamegpu::detail::cuda::cudaFree(mm.second));
    }
    memory_map.clear();
}

void CUDAMessageList::zeroDeviceMessageList_async(CUDAMessageMap& memory_map, cudaStream_t stream, unsigned int skip_offset) {
    if (skip_offset >= message.getMaximumListSize())
        return;
    // for each device pointer in the cuda memory map set the values to 0
    for (const auto &mm : memory_map) {
        // get the variable size from message description
        const auto var = message.getMessageData().variables.at(mm.first);
        const size_t var_size = var.type_size * var.elements;

        // set the memory to zero
        gpuErrchk(cudaMemsetAsync(static_cast<char*>(mm.second) + (var_size * skip_offset), 0, var_size * (message.getMaximumListSize() - skip_offset), stream));
    }
}

void* CUDAMessageList::getReadMessageListVariablePointer(std::string variable_name) {
    CUDAMessageMap::iterator mm = d_list.find(variable_name);
    if (mm == d_list.end()) {
        THROW exception::InvalidMessageVar("Variable '%s' was not found in message '%s', "
          "in CUDAMessageList::getReadMessageListVariablePointer()",
          variable_name.c_str(), message.getMessageData().name.c_str());
    }

    return mm->second;
}
void* CUDAMessageList::getWriteMessageListVariablePointer(std::string variable_name) {
    CUDAMessageMap::iterator mm = d_swap_list.find(variable_name);
    if (mm == d_swap_list.end()) {
        THROW exception::InvalidMessageVar("Variable '%s' was not found in message '%s', "
            "in CUDAMessageList::getWriteMessageListVariablePointer()",
            variable_name.c_str(), message.getMessageData().name.c_str());
    }

    return mm->second;
}

void CUDAMessageList::zeroMessageData(cudaStream_t stream) {
    zeroDeviceMessageList_async(d_list, stream);
    zeroDeviceMessageList_async(d_swap_list, stream);
    gpuErrchk(cudaStreamSynchronize(stream));
}


void CUDAMessageList::swap() {
    std::swap(d_list, d_swap_list);
}

unsigned int CUDAMessageList::scatter(unsigned int newCount, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId, bool append) {
    if (append) {
        unsigned int oldCount = message.getMessageCount();
        return oldCount + scatter.scatter(streamId,
            stream,
            CUDAScatter::Type::MESSAGE_OUTPUT,
            message.getMessageData().variables,
            d_swap_list, d_list,
            newCount,
            oldCount);
    } else {
        return scatter.scatter(streamId,
            stream,
            CUDAScatter::Type::MESSAGE_OUTPUT,
            message.getMessageData().variables,
            d_swap_list, d_list,
            newCount,
            0);
    }
}
unsigned int CUDAMessageList::scatterAll(unsigned int newCount, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId) {
    unsigned int oldCount = message.getMessageCount();
    return oldCount + scatter.scatterAll(streamId,
        stream,
        message.getMessageData().variables,
        d_swap_list, d_list,
        newCount,
        oldCount);
}

}  // namespace detail
}  // namespace flamegpu