.. _program_listing_file_src_flamegpu_simulation_detail_CUDAFatAgentStateList.cu: Program Listing for File CUDAFatAgentStateList.cu ================================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/flamegpu/simulation/detail/CUDAFatAgentStateList.cu``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "flamegpu/simulation/detail/CUDAFatAgentStateList.h" #include "flamegpu/simulation/detail/CUDAScatter.cuh" #include "flamegpu/detail/cuda.cuh" namespace flamegpu { namespace detail { CUDAFatAgentStateList::CUDAFatAgentStateList(const AgentData& description) : aliveAgents(0) , disabledAgents(0) , bufferLen(0) { // Initial statelist, must be from agent index 0 // State lists begin unallocated, allocated on first use for (const auto &v : description.variables) { AgentVariable variable = {0u, v.first}; variables.emplace(variable, std::make_shared(v.second.type, v.second.type_size, v.second.default_value, v.second.elements)); } // All initial variables are unique for (const auto &s : variables) variables_unique.push_back(s.second); } CUDAFatAgentStateList::CUDAFatAgentStateList(const CUDAFatAgentStateList& other) : aliveAgents(other.aliveAgents) , disabledAgents(other.disabledAgents) , bufferLen(0) { assert(other.bufferLen == 0); std::unordered_map> var_map; // Copy all unique variables, create a temporary map of old unique var to new unique var for (const auto &v : other.variables_unique) { assert(v->data == nullptr); // Ensure that copy constructor is used auto t_var = std::make_shared(*v.get()); variables_unique.push_back(t_var); var_map.emplace(v.get(), t_var); } // Using var map, solve variable pairings for (const auto &v : other.variables) { variables.emplace(v.first, var_map.at(v.second.get())); } } CUDAFatAgentStateList::~CUDAFatAgentStateList() { for (const auto &buff : variables_unique) { gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data)); gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap)); } } void CUDAFatAgentStateList::addSubAgentVariables( const AgentData &description, const unsigned int master_fat_index, const unsigned int sub_fat_index, const std::shared_ptr &mapping) { for (const auto &v : description.variables) { const auto &mapped = mapping->variables.find(v.first); AgentVariable sub_var = {sub_fat_index, v.first}; if (mapped != mapping->variables.end()) { // Variable is mapped, so use existing variable AgentVariable master_var = {master_fat_index, mapped->second}; variables.emplace(sub_var, variables.at(master_var)); } else { // Variable is not mapped, so create new variable auto t_buff = std::make_shared(v.second.type, v.second.type_size, v.second.default_value, v.second.elements); variables.emplace(sub_var, t_buff); variables_unique.push_back(t_buff); } } } std::shared_ptr CUDAFatAgentStateList::getVariableBuffer(const unsigned int fat_index, const std::string &name) { const AgentVariable variable = {fat_index, name}; return variables.at(variable); } void CUDAFatAgentStateList::resize(const unsigned int minSize, const bool retainData, const cudaStream_t stream) { // If already big enough return if (minSize <= bufferLen) return; // else, decide new size unsigned int newSize = bufferLen > 1024 ? bufferLen : 1024; while (newSize < minSize) newSize = static_cast(newSize * 1.25f); // Resize all buffers in fat state list for (auto &buff : variables_unique) { const size_t var_size = buff->type_size * buff->elements; const size_t buff_size = var_size * newSize; // Free old swap buffer gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap)); // Allocate new buffer to swap gpuErrchk(cudaMalloc(&buff->data_swap, buff_size)); // Copy old data to new buffer in swap if (retainData && buff->data) { const size_t active_len = aliveAgents * var_size; // const size_t inactive_len = (newSize - aliveAgents) * var_size; // Copy across old data (TODO: We could improve this by doing a scatter for all variables at once) gpuErrchk(cudaMemcpyAsync(buff->data_swap, buff->data, active_len, cudaMemcpyDeviceToDevice, stream)); // Zero remaining new data (This will be overwritten before use, so redundant) // gpuErrchk(cudaMemsetAsync(reinterpret_cast(buff->data_swap) + active_len, 0, inactive_len, stream)); } else { // Zero remaining new data (This will be overwritten before use, so redundant) // gpuErrchk(cudaMemsetAsync(buff->data_swap, 0, buff_size, stream)); } } if (retainData) { // Ensure copies have finished, before we free the buffers! gpuErrchk(cudaStreamSynchronize(stream)); } for (auto& buff : variables_unique) { const size_t var_size = buff->type_size * buff->elements; const size_t buff_size = var_size * newSize; // Swap buffers std::swap(buff->data_swap, buff->data); // Free old swap buffer gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap)); // Allocate new buffer to swap gpuErrchk(cudaMalloc(&buff->data_swap, buff_size)); // Update condition list assert(disabledAgents == 0); buff->data_condition = buff->data; } // Update buffer len bufferLen = newSize; // Clear count if (!retainData) { aliveAgents = 0; disabledAgents = 0; } } unsigned int CUDAFatAgentStateList::getSize() const { return aliveAgents - disabledAgents; } unsigned int CUDAFatAgentStateList::getSizeWithDisabled() const { return aliveAgents; } unsigned int CUDAFatAgentStateList::getAllocatedSize() const { return bufferLen; } void CUDAFatAgentStateList::setAgentCount(const unsigned int newCount, const bool resetDisabled) { if ((resetDisabled && newCount > bufferLen) || (!resetDisabled && (newCount + disabledAgents> bufferLen))) { THROW exception::InvalidMemoryCapacity("Agent count will exceed allocated buffer size, " "in CUDAFatAgentStateList::setAgentCount()\n"); } if (resetDisabled) { disabledAgents = 0; } aliveAgents = disabledAgents + newCount; } unsigned int CUDAFatAgentStateList::scatterDeath(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { // Build scatter data std::vector sd; for (const auto &v : variables_unique) { char *in_p = reinterpret_cast(v->data); char *out_p = reinterpret_cast(v->data_swap); sd.push_back({ v->type_size * v->elements, in_p, out_p }); // Pre swap stored pointers std::swap(v->data, v->data_swap); // Pre update data_condition v->data_condition = out_p + (disabledAgents * v->type_size * v->elements); } // Perform scatter const unsigned int living_agents = scatter.scatter( streamId, stream, CUDAScatter::Type::AGENT_DEATH, sd, aliveAgents, 0, false, disabledAgents); // Update size assert(living_agents <= bufferLen); aliveAgents = living_agents; return living_agents; } unsigned int CUDAFatAgentStateList::scatterAgentFunctionConditionFalse(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { // This makes no sense if we have disabled agents (it's supposed to reorder to create disabled agents) assert(disabledAgents == 0); // Build scatter data std::vector sd; for (const auto &v : variables_unique) { char *in_p = reinterpret_cast(v->data); char *out_p = reinterpret_cast(v->data_swap); sd.push_back({ v->type_size * v->elements, in_p, out_p }); } // Perform scatter const unsigned int scattered_agents = scatter.scatter(streamId, stream, CUDAScatter::Type::AGENT_DEATH, sd, aliveAgents, 0, false, disabledAgents); return scattered_agents; } unsigned int CUDAFatAgentStateList::scatterAgentFunctionConditionTrue(const unsigned int conditionFailCount, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { // This makes no sense if we have disabled agents (it's suppose to reorder to create disabled agents) assert(disabledAgents == 0); // Build scatter data std::vector sd; for (const auto &v : variables_unique) { char *in_p = reinterpret_cast(v->data); char *out_p = reinterpret_cast(v->data_swap); sd.push_back({ v->type_size * v->elements, in_p, out_p }); // Pre swap stored pointers std::swap(v->data, v->data_swap); // Pre update data_condition v->data_condition = out_p + (conditionFailCount * v->type_size * v->elements); } // Perform scatter const unsigned int scattered_agents = scatter.scatter(streamId, stream, CUDAScatter::Type::AGENT_DEATH, sd, aliveAgents, conditionFailCount, true, disabledAgents); // Update disabled agents count disabledAgents = conditionFailCount; return scattered_agents; } void CUDAFatAgentStateList::setDisabledAgents(const unsigned int numberOfDisabled) { assert(numberOfDisabled <= aliveAgents); disabledAgents = numberOfDisabled; // update data_condition for each unique variable for (const auto &v : variables_unique) { char *data_p = reinterpret_cast(v->data); v->data_condition = data_p + (numberOfDisabled * v->type_size * v->elements); } } void CUDAFatAgentStateList::scatterSort_async(detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream) { // This is not designed to run when there are disabled agents assert(disabledAgents == 0); // Build scatter data std::vector sd; for (const auto &v : variables_unique) { char *in_p = reinterpret_cast(v->data); char *out_p = reinterpret_cast(v->data_swap); sd.push_back({ v->type_size * v->elements, in_p, out_p }); // Pre swap stored pointers std::swap(v->data, v->data_swap); // Pre update data_condition v->data_condition = out_p; } scatter.scatterPosition_async(streamId, stream, CUDAScatter::Type::MESSAGE_OUTPUT, sd, aliveAgents); } void CUDAFatAgentStateList::initVariables(std::set> &exclusionSet, const unsigned int initCount, const unsigned initOffset, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) { if (initCount && exclusionSet.size()) { assert(initCount + initOffset <= bufferLen); std::list> initVars; // Build list of init vars (to save repeating this process), and calculate memory requirements for (const auto &v : variables_unique) { if (exclusionSet.find(v) == exclusionSet.end()) { initVars.push_back(v); } } // Perform scatter scatter.broadcastInit(streamId, stream, initVars, initCount, initOffset); } } std::list> &CUDAFatAgentStateList::getUniqueVariables() { return variables_unique; } void CUDAFatAgentStateList::swap(CUDAFatAgentStateList*other) { std::swap(aliveAgents, other->aliveAgents); std::swap(disabledAgents, other->disabledAgents); std::swap(bufferLen, other->bufferLen); for (auto a = variables_unique.begin(), b=other->variables_unique.begin(); a != variables_unique.end() && b != other->variables_unique.end(); ++a, ++b) { (*a)->swap(b->get()); } } std::list> CUDAFatAgentStateList::getBuffers(std::set>& exclusionSet) { std::list> returnVars; for (const auto& v : variables_unique) { if (exclusionSet.find(v) == exclusionSet.end()) { returnVars.push_back(v); } } return returnVars; } } // namespace detail } // namespace flamegpu