.. _program_listing_file_src_flamegpu_simulation_detail_CUDAMessageList.cu: Program Listing for File CUDAMessageList.cu =========================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/flamegpu/simulation/detail/CUDAMessageList.cu``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include #include #include "flamegpu/simulation/detail/CUDAMessageList.h" #include "flamegpu/simulation/detail/CUDAMessage.h" #include "flamegpu/simulation/detail/CUDAErrorChecking.cuh" #include "flamegpu/runtime/messaging/MessageBruteForce/MessageBruteForceHost.h" #include "flamegpu/simulation/detail/CUDAScatter.cuh" #include "flamegpu/detail/cuda.cuh" namespace flamegpu { namespace detail { CUDAMessageList::CUDAMessageList(CUDAMessage& cuda_message, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId) : message(cuda_message) { // allocate message lists allocateDeviceMessageList(d_list); allocateDeviceMessageList(d_swap_list); zeroDeviceMessageList_async(d_list, stream); zeroDeviceMessageList_async(d_swap_list, stream); gpuErrchk(cudaStreamSynchronize(stream)); } CUDAMessageList::~CUDAMessageList() { cleanupAllocatedData(); } void CUDAMessageList::cleanupAllocatedData() { // clean up releaseDeviceMessageList(d_list); releaseDeviceMessageList(d_swap_list); } void CUDAMessageList::allocateDeviceMessageList(CUDAMessageMap &memory_map) { // we use the messages memory map to iterate the message variables and do allocation within our GPU hash map const auto &mem = message.getMessageData().variables; // for each variable allocate a device array and add to map for (const auto &mm : mem) { // get the variable name std::string var_name = mm.first; // get the variable size from message description size_t var_size = mm.second.type_size * mm.second.elements; // do the device allocation void * d_ptr; #ifdef UNIFIED_GPU_MEMORY // unified memory allocation gpuErrchk(cudaMallocManaged(reinterpret_cast(&d_ptr), var_size * message.getMaximumListSize())) #else // non unified memory allocation gpuErrchk(cudaMalloc(reinterpret_cast(&d_ptr), var_size * message.getMaximumListSize())); #endif // store the pointer in the map memory_map.insert(CUDAMessageMap::value_type(var_name, d_ptr)); } } void CUDAMessageList::resize(CUDAScatter& scatter, cudaStream_t stream, unsigned int streamId, unsigned int keep_len) { // Release d_swap_list, we don't retain this data releaseDeviceMessageList(d_swap_list); // Allocate the new d_list CUDAMessageMap d_list_old; std::swap(d_list, d_list_old); allocateDeviceMessageList(d_list); if (keep_len && keep_len <= message.getMessageCount()) { // Copy data from d_list_old to d_list // Note, if keep_len exceeds length of d_swap_list_old, this will crash scatter.scatterAll(streamId, stream, message.getMessageData().variables, d_list_old, d_list, keep_len, 0); } // Release d_list_old releaseDeviceMessageList(d_list_old); // Allocate the new d_swap_list allocateDeviceMessageList(d_swap_list); // Zero any new buffers with undefined data zeroDeviceMessageList_async(d_list, stream, keep_len); zeroDeviceMessageList_async(d_swap_list, stream); gpuErrchk(cudaStreamSynchronize(stream)); } void CUDAMessageList::releaseDeviceMessageList(CUDAMessageMap& memory_map) { // for each device pointer in the cuda memory map we need to free these for (const auto &mm : memory_map) { // free the memory on the device gpuErrchk(flamegpu::detail::cuda::cudaFree(mm.second)); } memory_map.clear(); } void CUDAMessageList::zeroDeviceMessageList_async(CUDAMessageMap& memory_map, cudaStream_t stream, unsigned int skip_offset) { if (skip_offset >= message.getMaximumListSize()) return; // for each device pointer in the cuda memory map set the values to 0 for (const auto &mm : memory_map) { // get the variable size from message description const auto var = message.getMessageData().variables.at(mm.first); const size_t var_size = var.type_size * var.elements; // set the memory to zero gpuErrchk(cudaMemsetAsync(static_cast(mm.second) + (var_size * skip_offset), 0, var_size * (message.getMaximumListSize() - skip_offset), stream)); } } void* CUDAMessageList::getReadMessageListVariablePointer(std::string variable_name) { CUDAMessageMap::iterator mm = d_list.find(variable_name); if (mm == d_list.end()) { THROW exception::InvalidMessageVar("Variable '%s' was not found in message '%s', " "in CUDAMessageList::getReadMessageListVariablePointer()", variable_name.c_str(), message.getMessageData().name.c_str()); } return mm->second; } void* CUDAMessageList::getWriteMessageListVariablePointer(std::string variable_name) { CUDAMessageMap::iterator mm = d_swap_list.find(variable_name); if (mm == d_swap_list.end()) { THROW exception::InvalidMessageVar("Variable '%s' was not found in message '%s', " "in CUDAMessageList::getWriteMessageListVariablePointer()", variable_name.c_str(), message.getMessageData().name.c_str()); } return mm->second; } void CUDAMessageList::zeroMessageData(cudaStream_t stream) { zeroDeviceMessageList_async(d_list, stream); zeroDeviceMessageList_async(d_swap_list, stream); gpuErrchk(cudaStreamSynchronize(stream)); } void CUDAMessageList::swap() { std::swap(d_list, d_swap_list); } unsigned int CUDAMessageList::scatter(unsigned int newCount, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId, bool append) { if (append) { unsigned int oldCount = message.getMessageCount(); return oldCount + scatter.scatter(streamId, stream, CUDAScatter::Type::MESSAGE_OUTPUT, message.getMessageData().variables, d_swap_list, d_list, newCount, oldCount); } else { return scatter.scatter(streamId, stream, CUDAScatter::Type::MESSAGE_OUTPUT, message.getMessageData().variables, d_swap_list, d_list, newCount, 0); } } unsigned int CUDAMessageList::scatterAll(unsigned int newCount, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId) { unsigned int oldCount = message.getMessageCount(); return oldCount + scatter.scatterAll(streamId, stream, message.getMessageData().variables, d_swap_list, d_list, newCount, oldCount); } } // namespace detail } // namespace flamegpu