Program Listing for File CUDAAgentStateList.cu
↰ Return to documentation for file (src/flamegpu/simulation/detail/CUDAAgentStateList.cu
)
#include "flamegpu/simulation/detail/CUDAAgentStateList.h"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "flamegpu/simulation/detail/CUDAAgent.h"
#include "flamegpu/simulation/detail/CUDAErrorChecking.cuh"
#include "flamegpu/simulation/AgentVector.h"
#include "flamegpu/model/AgentDescription.h"
#include "flamegpu/simulation/detail/CUDAScatter.cuh"
#include "flamegpu/runtime/agent/HostNewAgentAPI.h"
#include "flamegpu/exception/FLAMEGPUException.h"
#ifdef _MSC_VER
#pragma warning(push, 1)
#pragma warning(disable : 4706 4834)
#endif // _MSC_VER
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_suppress 1719
#else
#pragma diag_suppress 1719
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
#include <cub/cub.cuh>
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_default 1719
#else
#pragma diag_default 1719
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
#ifdef _MSC_VER
#pragma warning(pop)
#endif // _MSC_VER
namespace flamegpu {
namespace detail {
CUDAAgentStateList::CUDAAgentStateList(
const std::shared_ptr<CUDAFatAgentStateList> &fat_list,
CUDAAgent& cuda_agent,
const unsigned int _fat_index,
const AgentData& description,
bool _isSubStateList)
: fat_index(_fat_index)
, agent(cuda_agent)
, parent_list(fat_list)
, isSubStateList(_isSubStateList) {
// For each agent variable, take a copy of the shared pointer, store it
for (auto var : description.variables) {
variables.emplace(var.first, fat_list->getVariableBuffer(fat_index, var.first));
}
}
CUDAAgentStateList::CUDAAgentStateList(
const std::shared_ptr<CUDAFatAgentStateList> &fat_list,
CUDAAgent& cuda_agent,
const unsigned int _fat_index,
const AgentData& description,
bool _isSubStateList,
const SubAgentData::Mapping &varMap)
: CUDAAgentStateList(fat_list, cuda_agent, _fat_index, description, _isSubStateList) {
// Build a list of variables not present in the mapping
// These are not mapped to parent agent, therefore they must be reset when CUDASimulation::simulate() is called
for (auto var : variables) {
if (varMap.find(var.first)== varMap.end()) {
unmappedBuffers.push_back(var.second);
}
}
}
void CUDAAgentStateList::resize(const unsigned int minimumSize, const bool retainData, const cudaStream_t stream) {
parent_list->resize(minimumSize, retainData, stream);
}
unsigned int CUDAAgentStateList::getSize() const {
return parent_list->getSize();
}
unsigned int CUDAAgentStateList::getAllocatedSize() const {
return parent_list->getAllocatedSize();
}
void *CUDAAgentStateList::getVariablePointer(const std::string &variable_name) {
// check the cuda agent state map to find the correct state list for functions starting state
auto var = variables.find(variable_name);
if (var == variables.end()) {
THROW exception::InvalidAgentVar("Error: Agent ('%s') variable ('%s') was not found "
"in CUDAAgentStateList::getVariablePointer()",
agent.getAgentDescription().getName().c_str(), variable_name.c_str());
}
return var->second->data_condition;
}
void CUDAAgentStateList::setAgentData(const AgentVector& population, CUDAScatter& scatter, const unsigned int streamId, const cudaStream_t stream) {
// Validate AgentData matches
if (!population.matchesAgentType(agent.getAgentDescription())) {
THROW exception::InvalidCudaAgentDesc("Agent description for agent '%s' does not match that of AgentVector, "
"in CUDAAgentStateList::setAgentData()",
population.getAgentName().c_str());
}
// Check our internal state matches or exceeds the size of the state in the agent pop
// This will return if list already correct size
const unsigned int data_count = population.size();
if (data_count) {
parent_list->resize(data_count, false, stream); // FALSE=Do not retain existing data
// Initialise any buffers in the fat_agent which aren't part of the agent description
std::set<std::shared_ptr<VariableBuffer>> exclusionSet;
for (auto& a : variables)
exclusionSet.insert(a.second);
parent_list->initVariables(exclusionSet, data_count, 0, scatter, streamId, stream);
// Copy across the required data host->device
for (auto& _var : variables) {
// get the variable size from agent description
const CAgentDescription agent_desc = agent.getAgentDescription();
const size_t var_size = agent_desc.getVariableSize(_var.first);
const unsigned int var_elements = agent_desc.getVariableLength(_var.first);
// get pointer to vector data
const void* v_data = population.data(_var.first);
// copy the host data to the GPU
gpuErrchk(cudaMemcpyAsync(_var.second->data, v_data, var_elements * var_size * data_count, cudaMemcpyHostToDevice, stream));
gpuErrchk(cudaStreamSynchronize(stream));
}
}
// Update alive count etc
parent_list->setAgentCount(data_count);
}
void CUDAAgentStateList::getAgentData(AgentVector& population) const {
// Validate AgentData matches
if (!population.matchesAgentType(agent.getAgentDescription())) {
THROW exception::InvalidCudaAgentDesc("Agent description for agent '%s' does not match that of AgentVector, "
"in CUDAAgentStateList::setAgentData()",
population.getAgentName().c_str());
}
const unsigned int data_count = getSize();
if (data_count) {
population.internal_resize(data_count, false);
// Copy across the required data device->host
for (auto& _var : variables) {
const CAgentDescription agent_desc = agent.getAgentDescription();
const size_t var_size = agent_desc.getVariableSize(_var.first);
const unsigned int var_elements = agent_desc.getVariableLength(_var.first);
// get pointer to vector data
// Use the const method, but const cast away the const to avoid the reserved var check
void* v_data = const_cast<void*>(static_cast<const AgentVector&>(population).data(_var.first));
// copy the host data to the GPU
gpuErrchk(cudaMemcpy(v_data, _var.second->data, var_elements * var_size * data_count, cudaMemcpyDeviceToHost));
}
}
population._size = data_count; // Private AgentVector::resize() does not update size
}
void CUDAAgentStateList::scatterHostCreation(unsigned int newSize, char* const d_inBuff, const VarOffsetStruct & offsets, detail::CUDAScatter & scatter, const unsigned int streamId, const cudaStream_t stream) {
// Resize agent list if required
parent_list->resize(parent_list->getSizeWithDisabled() + newSize, true, stream);
// Build scatter data
std::vector<CUDAScatter::ScatterData> sd;
for (const auto &v : variables) {
// In this case, in is the location of first variable, but we step by inOffsetData.totalSize
char *in_p = reinterpret_cast<char*>(d_inBuff) + offsets.vars.at(v.first).offset;
char *out_p = reinterpret_cast<char*>(v.second->data);
sd.push_back({ v.second->type_size * v.second->elements, in_p, out_p });
}
// Scatter to device
scatter.scatterNewAgents(streamId,
stream,
sd,
offsets.totalSize,
newSize,
parent_list->getSize());
// Initialise any buffers in the fat_agent which aren't part of the current agent description
// TODO: This does redundant inits, it only needs to initialise parent/master agent variables which are not mapped
// Sub variables will already be init everytime the submodel is executed.
std::set<std::shared_ptr<VariableBuffer>> exclusionSet;
for (auto &a : variables)
exclusionSet.insert(a.second);
parent_list->initVariables(exclusionSet, newSize, parent_list->getSize(), scatter, streamId, stream);
// Update number of alive agents
parent_list->setAgentCount(parent_list->getSize() + newSize);
}
void CUDAAgentStateList::scatterSort_async(detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream) {
parent_list->scatterSort_async(scatter, streamId, stream);
}
unsigned int CUDAAgentStateList::scatterNew(void * d_newBuff, const unsigned int newSize, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
if (newSize) {
CUDAScanCompactionConfig &scanCfg = scatter.Scan().Config(CUDAScanCompaction::Type::AGENT_OUTPUT, streamId);
// Check if we need to resize cub storage
auto& cub_temp = scatter.CubTemp(streamId);
size_t tempByte = 0;
gpuErrchk(cub::DeviceScan::ExclusiveSum(
nullptr,
tempByte,
scanCfg.d_ptrs.scan_flag,
scanCfg.d_ptrs.position,
newSize + 1,
stream));
cub_temp.resize(tempByte);
// Perform scan
gpuErrchk(cub::DeviceScan::ExclusiveSum(
cub_temp.getPtr(),
cub_temp.getSize(),
scanCfg.d_ptrs.scan_flag,
scanCfg.d_ptrs.position,
newSize + 1,
stream));
gpuErrchk(cudaStreamSynchronize(stream));
// Resize if necessary
// @todo? this could be improved by checking scan result for the actual size, rather than max size)
resize(parent_list->getSizeWithDisabled() + newSize, true, stream);
// Build scatter data
char * d_var = static_cast<char*>(d_newBuff);
std::vector<CUDAScatter::ScatterData> scatterdata;
for (const auto &v : variables) {
char *in_p = reinterpret_cast<char*>(d_var);
char *out_p = reinterpret_cast<char*>(v.second->data_condition);
scatterdata.push_back({ v.second->type_size * v.second->elements, in_p, out_p });
// Prep pointer for next var
d_var += v.second->type_size * v.second->elements * newSize;
// 64 bit align the new buffer start
if (reinterpret_cast<size_t>(d_var)%8) {
d_var += 8 - (reinterpret_cast<size_t>(d_var)%8);
}
}
// Perform scatter
const unsigned int new_births = scatter.scatter(
streamId,
stream,
CUDAScatter::Type::AGENT_OUTPUT,
scatterdata,
newSize, parent_list->getSizeWithDisabled());
if (new_births == 0) return 0;
// Initialise any buffers in the fat_agent which aren't part of the current agent description
// TODO: This does redundant inits, it only needs to initialise parent/master agent variables which are not mapped
// Sub variables will already be init everytime the submodel is executed.
std::set<std::shared_ptr<VariableBuffer>> exclusionSet;
for (auto &a : variables)
exclusionSet.insert(a.second);
parent_list->initVariables(exclusionSet, newSize, parent_list->getSize(), scatter, streamId, stream);
// Update number of alive agents
parent_list->setAgentCount(parent_list->getSize() + new_births);
return new_births;
}
return 0;
}
bool CUDAAgentStateList::getIsSubStatelist() {
return isSubStateList;
}
void CUDAAgentStateList::initUnmappedVars(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
assert(parent_list->getSizeWithDisabled() == parent_list->getSize());
if (parent_list->getSize()) {
assert(isSubStateList);
// If unmappedBuffers is not empty, perform broadcast init
if (unmappedBuffers.size()) {
scatter.broadcastInit(streamId, stream, unmappedBuffers, parent_list->getSize(), 0);
}
}
}
void CUDAAgentStateList::initExcludedVars(const unsigned int count, const unsigned int offset, CUDAScatter& scatter, const unsigned int streamId, const cudaStream_t stream) {
std::set<std::shared_ptr<VariableBuffer>> exclusionSet;
for (auto& a : variables)
exclusionSet.insert(a.second);
parent_list->initVariables(exclusionSet, count, offset, scatter, streamId, stream);
}
void CUDAAgentStateList::clear() {
parent_list->setAgentCount(0, true);
}
void CUDAAgentStateList::setAgentCount(const unsigned int newSize) {
parent_list->setAgentCount(newSize, false);
}
std::list<std::shared_ptr<VariableBuffer>> CUDAAgentStateList::getUnboundVariableBuffers() {
std::set<std::shared_ptr<VariableBuffer>> exclusionSet;
for (auto& a : variables)
exclusionSet.insert(a.second);
return parent_list->getBuffers(exclusionSet);
}
} // namespace detail
} // namespace flamegpu