.. _program_listing_file_src_flamegpu_simulation_detail_CUDAMessage.cu:

Program Listing for File CUDAMessage.cu
=======================================

|exhale_lsh| :ref:`Return to documentation for file <file_src_flamegpu_simulation_detail_CUDAMessage.cu>` (``src/flamegpu/simulation/detail/CUDAMessage.cu``)

.. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS

.. code-block:: cpp

   #include <cuda_runtime.h>
   #include <device_launch_parameters.h>
   
   #include <string>
   #include <memory>
   #include <algorithm>
   
   #include "flamegpu/simulation/detail/CUDAMessage.h"
   #include "flamegpu/simulation/detail/CUDAAgent.h"
   #include "flamegpu/simulation/detail/CUDAMessageList.h"
   #include "flamegpu/simulation/detail/CUDAErrorChecking.cuh"
   #include "flamegpu/simulation/detail/CUDAScatter.cuh"
   
   #include "flamegpu/runtime/messaging/MessageBruteForce.h"
   #include "flamegpu/model/AgentFunctionDescription.h"
   #include "flamegpu/runtime/detail/curve/HostCurve.cuh"
   #include "flamegpu/model/AgentDescription.h"
   #include "flamegpu/runtime/messaging.h"
   
   #ifdef _MSC_VER
   #pragma warning(push, 1)
   #pragma warning(disable : 4706 4834)
   #endif  // _MSC_VER
   #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
   #pragma nv_diag_suppress 1719
   #else
   #pragma diag_suppress 1719
   #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
   #include <cub/cub.cuh>
   #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
   #pragma nv_diag_default 1719
   #else
   #pragma diag_default 1719
   #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
   #ifdef _MSC_VER
   #pragma warning(pop)
   #endif  // _MSC_VER
   
   namespace flamegpu {
   namespace detail {
   
   CUDAMessage::CUDAMessage(const MessageBruteForce::Data& description, const CUDASimulation& cudaSimulation)
       : message_description(description)
       , message_count(0)
       , max_list_size(0)
       , truncate_messagelist_flag(true)
       , pbm_construction_required(false)
       , specialisation_handler(description.getSpecialisationHander(*this))
       , cudaSimulation(cudaSimulation) {
       // resize(0); // Think this call is redundant
   }
   
   CUDAMessage::~CUDAMessage(void) {
       // @todo - this should not be done in a destructor, rather an explicit cleanup method.
       specialisation_handler->freeMetaDataDevicePtr();
   }
   
   const MessageBruteForce::Data& CUDAMessage::getMessageData() const {
       return message_description;
   }
   
   void CUDAMessage::resize(unsigned int newSize, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId, unsigned int keepLen) {
       // Only grow currently
       if (newSize > max_list_size) {
           const unsigned int _keep_len = std::min(max_list_size, keepLen);
           max_list_size = std::max<unsigned int>(max_list_size, 2u);
           while (max_list_size < newSize) {
               max_list_size = static_cast<unsigned int>(max_list_size * 1.5);
           }
           if (message_list) {
               message_list->resize(scatter, stream, streamId, _keep_len);
           } else {
               // If the list has not already been allocated, create a new
               message_list = std::unique_ptr<CUDAMessageList>(new CUDAMessageList(*this, scatter, stream, streamId));
           }
           scatter.Scan().resize(max_list_size, CUDAScanCompaction::MESSAGE_OUTPUT, streamId);
       }
   }
   
   
   unsigned int CUDAMessage::getMaximumListSize() const {
       return max_list_size;
   }
   unsigned int CUDAMessage::getMessageCount() const {
       return message_count;
   }
   void CUDAMessage::setMessageCount(const unsigned int _message_count) {
       if (_message_count > max_list_size) {
           THROW exception::OutOfBoundsException("message count exceeds allocated message list size (%u > %u) in CUDAMessage::setMessageCount().", _message_count, max_list_size);
       }
       message_count = _message_count;
   }
   void CUDAMessage::init(detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream) {
       specialisation_handler->init(scatter, streamId, stream);
   }
   void CUDAMessage::zeroAllMessageData(cudaStream_t stream) {
       if (!message_list) {
           THROW exception::InvalidMessageData("MessageList '%s' is not yet allocated, in CUDAMessage::swap()\n", message_description.name.c_str());
       }
       message_list->zeroMessageData(stream);
   }
   
   void CUDAMessage::mapReadRuntimeVariables(const AgentFunctionData& func, const CUDAAgent& cuda_agent) const {
       // check that the message list has been allocated
       if (!message_list) {
           if (getMessageCount() == 0) {
               return;  // Message list is empty, this should be safe
           }
           THROW exception::InvalidMessageData("Error: Initial message list for message '%s' has not been allocated, "
               "in CUDAMessage::mapRuntimeVariables()",
               message_description.name.c_str());
       }
   
       // loop through the message variables to map each variable name using cuRVE
       for (const auto &mmp : message_description.variables) {
           // get a device pointer for the message variable name
           void* d_ptr = message_list->getReadMessageListVariablePointer(mmp.first);
   
           if (func.func) {
               auto& curve = cuda_agent.getCurve(func.name);  // @todo fix the heavy map lookups
               // maximum population size
               unsigned int length = this->getMessageCount();  // check to see if it is equal to pop
               curve.setMessageInputVariable(mmp.first, d_ptr, length);
           } else {
               // Map RTC variables (these must be mapped before each function execution as the runtime pointer may have changed to the swapping)
               // Copy data to rtc header cache
               auto &rtc_header = cuda_agent.getRTCHeader(func.name);
               memcpy(rtc_header.getMessageInVariableCachePtr(mmp.first.c_str()), &d_ptr, sizeof(void*));
               rtc_header.setMessageInVariableCount(mmp.first, this->getMessageCount());
           }
       }
   }
   
   void *CUDAMessage::getReadPtr(const std::string &var_name) {
       if (!message_list) {
           THROW exception::InvalidMessageData("MessageList '%s' is not yet allocated, in CUDAMessage::swap()\n", message_description.name.c_str());
       }
       return message_list->getReadMessageListVariablePointer(var_name);
   }
   void CUDAMessage::mapWriteRuntimeVariables(const AgentFunctionData& func, const CUDAAgent& cuda_agent, const unsigned int writeLen, cudaStream_t stream) const {
       // check that the message list has been allocated
       if (!message_list) {
           THROW exception::InvalidMessageData("Error: Initial message list for message '%s' has not been allocated, "
               "in CUDAMessage::mapRuntimeVariables()",
               message_description.name.c_str());
       }
   
       // loop through the message variables to map each variable name using cuRVE
       for (const auto &mmp : message_description.variables) {
           // get a device pointer for the message variable name
           void* d_ptr = message_list->getWriteMessageListVariablePointer(mmp.first);
   
           if (func.func) {
               auto &curve = cuda_agent.getCurve(func.name);  // @todo fix the heavy map lookups
               // maximum population size
               unsigned int length = writeLen;  // check to see if it is equal to pop
               curve.setMessageOutputVariable(mmp.first, d_ptr, length);
           } else {
               // Map RTC variables (these must be mapped before each function execution as the runtime pointer may have changed to the swapping)
               // Copy data to rtc header cache
               auto& rtc_header = cuda_agent.getRTCHeader(func.name);
               memcpy(rtc_header.getMessageOutVariableCachePtr(mmp.first.c_str()), &d_ptr, sizeof(void*));
               rtc_header.setMessageOutVariableCount(mmp.first, this->getMessageCount());
           }
       }
   
       // Allocate the metadata if required. (This call should now be redundant)
       specialisation_handler->allocateMetaDataDevicePtr(stream);
   }
   
   void CUDAMessage::swap(bool isOptional, unsigned int newMessageCount, detail::CUDAScatter &scatter, cudaStream_t stream, unsigned int streamId) {
       if (!message_list) {
           THROW exception::InvalidMessageData("MessageList '%s' is not yet allocated, in CUDAMessage::swap()\n", message_description.name.c_str());
       }
       if (isOptional && message_description.optional_outputs > 0) {
           auto &scanCfg = scatter.Scan().getConfig(CUDAScanCompaction::MESSAGE_OUTPUT, streamId);
           // Check if we need to resize cub storage
           auto& cub_temp = scatter.CubTemp(streamId);
           size_t tempByte = 0;
           gpuErrchk(cub::DeviceScan::ExclusiveSum(
               nullptr,
               tempByte,
               scanCfg.d_ptrs.scan_flag,
               scanCfg.d_ptrs.position,
               max_list_size + 1,
               stream));
           cub_temp.resize(tempByte);
           gpuErrchk(cub::DeviceScan::ExclusiveSum(
               cub_temp.getPtr(),
               cub_temp.getSize(),
               scanCfg.d_ptrs.scan_flag,
               scanCfg.d_ptrs.position,
               newMessageCount + 1,
               stream));
           // Scatter
           // Update count
           message_count = message_list->scatter(newMessageCount, scatter, stream, streamId, !this->truncate_messagelist_flag);
       } else {
           if (this->truncate_messagelist_flag) {
               message_count = newMessageCount;
               message_list->swap();
           } else {
               assert(message_count + newMessageCount <= max_list_size);
               // We're appending so use our scatter kernel
               message_count = message_list->scatterAll(newMessageCount, scatter, stream, streamId);
           }
       }
   }
   void CUDAMessage::swap() {
       if (!message_list) {
           THROW exception::InvalidMessageData("MessageList '%s' is not yet allocated, in CUDAMessage::swap()\n", message_description.name.c_str());
       }
       message_list->swap();
   }
   
   void CUDAMessage::buildIndex(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
       // Build the index if required.
       if (pbm_construction_required) {
           specialisation_handler->buildIndex(scatter, streamId, stream);
           pbm_construction_required = false;
       }
   }
   const void *CUDAMessage::getMetaDataDevicePtr() const {
       return specialisation_handler->getMetaDataDevicePtr();
   }
   
   }  // namespace detail
   }  // namespace flamegpu