.. _program_listing_file_src_flamegpu_simulation_detail_CUDAAgentStateList.cu: Program Listing for File CUDAAgentStateList.cu ============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/flamegpu/simulation/detail/CUDAAgentStateList.cu``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "flamegpu/simulation/detail/CUDAAgentStateList.h" #include #include #include "flamegpu/simulation/detail/CUDAAgent.h" #include "flamegpu/simulation/detail/CUDAErrorChecking.cuh" #include "flamegpu/simulation/AgentVector.h" #include "flamegpu/model/AgentDescription.h" #include "flamegpu/simulation/detail/CUDAScatter.cuh" #include "flamegpu/runtime/agent/HostNewAgentAPI.h" #include "flamegpu/exception/FLAMEGPUException.h" #ifdef _MSC_VER #pragma warning(push, 1) #pragma warning(disable : 4706 4834) #endif // _MSC_VER #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ #pragma nv_diag_suppress 1719 #else #pragma diag_suppress 1719 #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ #include #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ #pragma nv_diag_default 1719 #else #pragma diag_default 1719 #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ #ifdef _MSC_VER #pragma warning(pop) #endif // _MSC_VER namespace flamegpu { namespace detail { CUDAAgentStateList::CUDAAgentStateList( const std::shared_ptr &fat_list, CUDAAgent& cuda_agent, const unsigned int _fat_index, const AgentData& description, bool _isSubStateList) : fat_index(_fat_index) , agent(cuda_agent) , parent_list(fat_list) , isSubStateList(_isSubStateList) { // For each agent variable, take a copy of the shared pointer, store it for (auto var : description.variables) { variables.emplace(var.first, fat_list->getVariableBuffer(fat_index, var.first)); } } CUDAAgentStateList::CUDAAgentStateList( const std::shared_ptr &fat_list, CUDAAgent& cuda_agent, const unsigned int _fat_index, const AgentData& description, bool _isSubStateList, const SubAgentData::Mapping &varMap) : CUDAAgentStateList(fat_list, cuda_agent, _fat_index, description, _isSubStateList) { // Build a list of variables not present in the mapping // These are not mapped to parent agent, therefore they must be reset when CUDASimulation::simulate() is called for (auto var : variables) { if (varMap.find(var.first)== varMap.end()) { unmappedBuffers.push_back(var.second); } } } void CUDAAgentStateList::resize(const unsigned int minimumSize, const bool retainData, const cudaStream_t stream) { parent_list->resize(minimumSize, retainData, stream); } unsigned int CUDAAgentStateList::getSize() const { return parent_list->getSize(); } unsigned int CUDAAgentStateList::getAllocatedSize() const { return parent_list->getAllocatedSize(); } void *CUDAAgentStateList::getVariablePointer(const std::string &variable_name) { // check the cuda agent state map to find the correct state list for functions starting state auto var = variables.find(variable_name); if (var == variables.end()) { THROW exception::InvalidAgentVar("Error: Agent ('%s') variable ('%s') was not found " "in CUDAAgentStateList::getVariablePointer()", agent.getAgentDescription().getName().c_str(), variable_name.c_str()); } return var->second->data_condition; } void CUDAAgentStateList::setAgentData(const AgentVector& population, CUDAScatter& scatter, const unsigned int streamId, const cudaStream_t stream) { // Validate AgentData matches if (!population.matchesAgentType(agent.getAgentDescription())) { THROW exception::InvalidCudaAgentDesc("Agent description for agent '%s' does not match that of AgentVector, " "in CUDAAgentStateList::setAgentData()", population.getAgentName().c_str()); } // Check our internal state matches or exceeds the size of the state in the agent pop // This will return if list already correct size const unsigned int data_count = population.size(); if (data_count) { parent_list->resize(data_count, false, stream); // FALSE=Do not retain existing data // Initialise any buffers in the fat_agent which aren't part of the agent description std::set> exclusionSet; for (auto& a : variables) exclusionSet.insert(a.second); parent_list->initVariables(exclusionSet, data_count, 0, scatter, streamId, stream); // Copy across the required data host->device for (auto& _var : variables) { // get the variable size from agent description const CAgentDescription agent_desc = agent.getAgentDescription(); const size_t var_size = agent_desc.getVariableSize(_var.first); const unsigned int var_elements = agent_desc.getVariableLength(_var.first); // get pointer to vector data const void* v_data = population.data(_var.first); // copy the host data to the GPU gpuErrchk(cudaMemcpyAsync(_var.second->data, v_data, var_elements * var_size * data_count, cudaMemcpyHostToDevice, stream)); gpuErrchk(cudaStreamSynchronize(stream)); } } // Update alive count etc parent_list->setAgentCount(data_count); } void CUDAAgentStateList::getAgentData(AgentVector& population) const { // Validate AgentData matches if (!population.matchesAgentType(agent.getAgentDescription())) { THROW exception::InvalidCudaAgentDesc("Agent description for agent '%s' does not match that of AgentVector, " "in CUDAAgentStateList::setAgentData()", population.getAgentName().c_str()); } const unsigned int data_count = getSize(); if (data_count) { population.internal_resize(data_count, false); // Copy across the required data device->host for (auto& _var : variables) { const CAgentDescription agent_desc = agent.getAgentDescription(); const size_t var_size = agent_desc.getVariableSize(_var.first); const unsigned int var_elements = agent_desc.getVariableLength(_var.first); // get pointer to vector data // Use the const method, but const cast away the const to avoid the reserved var check void* v_data = const_cast(static_cast(population).data(_var.first)); // copy the host data to the GPU gpuErrchk(cudaMemcpy(v_data, _var.second->data, var_elements * var_size * data_count, cudaMemcpyDeviceToHost)); } } population._size = data_count; // Private AgentVector::resize() does not update size } void CUDAAgentStateList::scatterHostCreation(unsigned int newSize, char* const d_inBuff, const VarOffsetStruct & offsets, detail::CUDAScatter & scatter, const unsigned int streamId, const cudaStream_t stream) { // Resize agent list if required parent_list->resize(parent_list->getSizeWithDisabled() + newSize, true, stream); // Build scatter data std::vector sd; for (const auto &v : variables) { // In this case, in is the location of first variable, but we step by inOffsetData.totalSize char *in_p = reinterpret_cast(d_inBuff) + offsets.vars.at(v.first).offset; char *out_p = reinterpret_cast(v.second->data); sd.push_back({ v.second->type_size * v.second->elements, in_p, out_p }); } // Scatter to device scatter.scatterNewAgents(streamId, stream, sd, offsets.totalSize, newSize, parent_list->getSize()); // Initialise any buffers in the fat_agent which aren't part of the current agent description // TODO: This does redundant inits, it only needs to initialise parent/master agent variables which are not mapped // Sub variables will already be init everytime the submodel is executed. std::set> exclusionSet; for (auto &a : variables) exclusionSet.insert(a.second); parent_list->initVariables(exclusionSet, newSize, parent_list->getSize(), scatter, streamId, stream); // Update number of alive agents parent_list->setAgentCount(parent_list->getSize() + newSize); } void CUDAAgentStateList::scatterSort_async(detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream) { parent_list->scatterSort_async(scatter, streamId, stream); } unsigned int CUDAAgentStateList::scatterNew(void * d_newBuff, const unsigned int newSize, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { if (newSize) { CUDAScanCompactionConfig &scanCfg = scatter.Scan().Config(CUDAScanCompaction::Type::AGENT_OUTPUT, streamId); // Check if we need to resize cub storage auto& cub_temp = scatter.CubTemp(streamId); size_t tempByte = 0; gpuErrchk(cub::DeviceScan::ExclusiveSum( nullptr, tempByte, scanCfg.d_ptrs.scan_flag, scanCfg.d_ptrs.position, newSize + 1, stream)); cub_temp.resize(tempByte); // Perform scan gpuErrchk(cub::DeviceScan::ExclusiveSum( cub_temp.getPtr(), cub_temp.getSize(), scanCfg.d_ptrs.scan_flag, scanCfg.d_ptrs.position, newSize + 1, stream)); gpuErrchk(cudaStreamSynchronize(stream)); // Resize if necessary // @todo? this could be improved by checking scan result for the actual size, rather than max size) resize(parent_list->getSizeWithDisabled() + newSize, true, stream); // Build scatter data char * d_var = static_cast(d_newBuff); std::vector scatterdata; for (const auto &v : variables) { char *in_p = reinterpret_cast(d_var); char *out_p = reinterpret_cast(v.second->data_condition); scatterdata.push_back({ v.second->type_size * v.second->elements, in_p, out_p }); // Prep pointer for next var d_var += v.second->type_size * v.second->elements * newSize; // 64 bit align the new buffer start if (reinterpret_cast(d_var)%8) { d_var += 8 - (reinterpret_cast(d_var)%8); } } // Perform scatter const unsigned int new_births = scatter.scatter( streamId, stream, CUDAScatter::Type::AGENT_OUTPUT, scatterdata, newSize, parent_list->getSizeWithDisabled()); if (new_births == 0) return 0; // Initialise any buffers in the fat_agent which aren't part of the current agent description // TODO: This does redundant inits, it only needs to initialise parent/master agent variables which are not mapped // Sub variables will already be init everytime the submodel is executed. std::set> exclusionSet; for (auto &a : variables) exclusionSet.insert(a.second); parent_list->initVariables(exclusionSet, newSize, parent_list->getSize(), scatter, streamId, stream); // Update number of alive agents parent_list->setAgentCount(parent_list->getSize() + new_births); return new_births; } return 0; } bool CUDAAgentStateList::getIsSubStatelist() { return isSubStateList; } void CUDAAgentStateList::initUnmappedVars(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { assert(parent_list->getSizeWithDisabled() == parent_list->getSize()); if (parent_list->getSize()) { assert(isSubStateList); // If unmappedBuffers is not empty, perform broadcast init if (unmappedBuffers.size()) { scatter.broadcastInit(streamId, stream, unmappedBuffers, parent_list->getSize(), 0); } } } void CUDAAgentStateList::initExcludedVars(const unsigned int count, const unsigned int offset, CUDAScatter& scatter, const unsigned int streamId, const cudaStream_t stream) { std::set> exclusionSet; for (auto& a : variables) exclusionSet.insert(a.second); parent_list->initVariables(exclusionSet, count, offset, scatter, streamId, stream); } void CUDAAgentStateList::clear() { parent_list->setAgentCount(0, true); } void CUDAAgentStateList::setAgentCount(const unsigned int newSize) { parent_list->setAgentCount(newSize, false); } std::list> CUDAAgentStateList::getUnboundVariableBuffers() { std::set> exclusionSet; for (auto& a : variables) exclusionSet.insert(a.second); return parent_list->getBuffers(exclusionSet); } } // namespace detail } // namespace flamegpu