Program Listing for File CUDAFatAgentStateList.cu
↰ Return to documentation for file (src/flamegpu/simulation/detail/CUDAFatAgentStateList.cu
)
#include "flamegpu/simulation/detail/CUDAFatAgentStateList.h"
#include "flamegpu/simulation/detail/CUDAScatter.cuh"
#include "flamegpu/detail/cuda.cuh"
namespace flamegpu {
namespace detail {
CUDAFatAgentStateList::CUDAFatAgentStateList(const AgentData& description)
: aliveAgents(0)
, disabledAgents(0)
, bufferLen(0) {
// Initial statelist, must be from agent index 0
// State lists begin unallocated, allocated on first use
for (const auto &v : description.variables) {
AgentVariable variable = {0u, v.first};
variables.emplace(variable, std::make_shared<VariableBuffer>(v.second.type, v.second.type_size, v.second.default_value, v.second.elements));
}
// All initial variables are unique
for (const auto &s : variables)
variables_unique.push_back(s.second);
}
CUDAFatAgentStateList::CUDAFatAgentStateList(const CUDAFatAgentStateList& other)
: aliveAgents(other.aliveAgents)
, disabledAgents(other.disabledAgents)
, bufferLen(0) {
assert(other.bufferLen == 0);
std::unordered_map<void*, std::shared_ptr<VariableBuffer>> var_map;
// Copy all unique variables, create a temporary map of old unique var to new unique var
for (const auto &v : other.variables_unique) {
assert(v->data == nullptr);
// Ensure that copy constructor is used
auto t_var = std::make_shared<VariableBuffer>(*v.get());
variables_unique.push_back(t_var);
var_map.emplace(v.get(), t_var);
}
// Using var map, solve variable pairings
for (const auto &v : other.variables) {
variables.emplace(v.first, var_map.at(v.second.get()));
}
}
CUDAFatAgentStateList::~CUDAFatAgentStateList() {
for (const auto &buff : variables_unique) {
gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data));
gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap));
}
}
void CUDAFatAgentStateList::addSubAgentVariables(
const AgentData &description,
const unsigned int master_fat_index,
const unsigned int sub_fat_index,
const std::shared_ptr<SubAgentData> &mapping) {
for (const auto &v : description.variables) {
const auto &mapped = mapping->variables.find(v.first);
AgentVariable sub_var = {sub_fat_index, v.first};
if (mapped != mapping->variables.end()) {
// Variable is mapped, so use existing variable
AgentVariable master_var = {master_fat_index, mapped->second};
variables.emplace(sub_var, variables.at(master_var));
} else {
// Variable is not mapped, so create new variable
auto t_buff = std::make_shared<VariableBuffer>(v.second.type, v.second.type_size, v.second.default_value, v.second.elements);
variables.emplace(sub_var, t_buff);
variables_unique.push_back(t_buff);
}
}
}
std::shared_ptr<VariableBuffer> CUDAFatAgentStateList::getVariableBuffer(const unsigned int fat_index, const std::string &name) {
const AgentVariable variable = {fat_index, name};
return variables.at(variable);
}
void CUDAFatAgentStateList::resize(const unsigned int minSize, const bool retainData, const cudaStream_t stream) {
// If already big enough return
if (minSize <= bufferLen)
return;
// else, decide new size
unsigned int newSize = bufferLen > 1024 ? bufferLen : 1024;
while (newSize < minSize)
newSize = static_cast<unsigned int>(newSize * 1.25f);
// Resize all buffers in fat state list
for (auto &buff : variables_unique) {
const size_t var_size = buff->type_size * buff->elements;
const size_t buff_size = var_size * newSize;
// Free old swap buffer
gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap));
// Allocate new buffer to swap
gpuErrchk(cudaMalloc(&buff->data_swap, buff_size));
// Copy old data to new buffer in swap
if (retainData && buff->data) {
const size_t active_len = aliveAgents * var_size;
// const size_t inactive_len = (newSize - aliveAgents) * var_size;
// Copy across old data (TODO: We could improve this by doing a scatter for all variables at once)
gpuErrchk(cudaMemcpyAsync(buff->data_swap, buff->data, active_len, cudaMemcpyDeviceToDevice, stream));
// Zero remaining new data (This will be overwritten before use, so redundant)
// gpuErrchk(cudaMemsetAsync(reinterpret_cast<char*>(buff->data_swap) + active_len, 0, inactive_len, stream));
} else {
// Zero remaining new data (This will be overwritten before use, so redundant)
// gpuErrchk(cudaMemsetAsync(buff->data_swap, 0, buff_size, stream));
}
}
if (retainData) {
// Ensure copies have finished, before we free the buffers!
gpuErrchk(cudaStreamSynchronize(stream));
}
for (auto& buff : variables_unique) {
const size_t var_size = buff->type_size * buff->elements;
const size_t buff_size = var_size * newSize;
// Swap buffers
std::swap(buff->data_swap, buff->data);
// Free old swap buffer
gpuErrchk(flamegpu::detail::cuda::cudaFree(buff->data_swap));
// Allocate new buffer to swap
gpuErrchk(cudaMalloc(&buff->data_swap, buff_size));
// Update condition list
assert(disabledAgents == 0);
buff->data_condition = buff->data;
}
// Update buffer len
bufferLen = newSize;
// Clear count
if (!retainData) {
aliveAgents = 0;
disabledAgents = 0;
}
}
unsigned int CUDAFatAgentStateList::getSize() const {
return aliveAgents - disabledAgents;
}
unsigned int CUDAFatAgentStateList::getSizeWithDisabled() const {
return aliveAgents;
}
unsigned int CUDAFatAgentStateList::getAllocatedSize() const {
return bufferLen;
}
void CUDAFatAgentStateList::setAgentCount(const unsigned int newCount, const bool resetDisabled) {
if ((resetDisabled && newCount > bufferLen) || (!resetDisabled && (newCount + disabledAgents> bufferLen))) {
THROW exception::InvalidMemoryCapacity("Agent count will exceed allocated buffer size, "
"in CUDAFatAgentStateList::setAgentCount()\n");
}
if (resetDisabled) {
disabledAgents = 0;
}
aliveAgents = disabledAgents + newCount;
}
unsigned int CUDAFatAgentStateList::scatterDeath(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
// Build scatter data
std::vector<CUDAScatter::ScatterData> sd;
for (const auto &v : variables_unique) {
char *in_p = reinterpret_cast<char*>(v->data);
char *out_p = reinterpret_cast<char*>(v->data_swap);
sd.push_back({ v->type_size * v->elements, in_p, out_p });
// Pre swap stored pointers
std::swap(v->data, v->data_swap);
// Pre update data_condition
v->data_condition = out_p + (disabledAgents * v->type_size * v->elements);
}
// Perform scatter
const unsigned int living_agents = scatter.scatter(
streamId,
stream,
CUDAScatter::Type::AGENT_DEATH, sd,
aliveAgents, 0, false, disabledAgents);
// Update size
assert(living_agents <= bufferLen);
aliveAgents = living_agents;
return living_agents;
}
unsigned int CUDAFatAgentStateList::scatterAgentFunctionConditionFalse(detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
// This makes no sense if we have disabled agents (it's supposed to reorder to create disabled agents)
assert(disabledAgents == 0);
// Build scatter data
std::vector<CUDAScatter::ScatterData> sd;
for (const auto &v : variables_unique) {
char *in_p = reinterpret_cast<char*>(v->data);
char *out_p = reinterpret_cast<char*>(v->data_swap);
sd.push_back({ v->type_size * v->elements, in_p, out_p });
}
// Perform scatter
const unsigned int scattered_agents = scatter.scatter(streamId,
stream,
CUDAScatter::Type::AGENT_DEATH, sd,
aliveAgents, 0, false, disabledAgents);
return scattered_agents;
}
unsigned int CUDAFatAgentStateList::scatterAgentFunctionConditionTrue(const unsigned int conditionFailCount, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
// This makes no sense if we have disabled agents (it's suppose to reorder to create disabled agents)
assert(disabledAgents == 0);
// Build scatter data
std::vector<CUDAScatter::ScatterData> sd;
for (const auto &v : variables_unique) {
char *in_p = reinterpret_cast<char*>(v->data);
char *out_p = reinterpret_cast<char*>(v->data_swap);
sd.push_back({ v->type_size * v->elements, in_p, out_p });
// Pre swap stored pointers
std::swap(v->data, v->data_swap);
// Pre update data_condition
v->data_condition = out_p + (conditionFailCount * v->type_size * v->elements);
}
// Perform scatter
const unsigned int scattered_agents = scatter.scatter(streamId,
stream,
CUDAScatter::Type::AGENT_DEATH, sd,
aliveAgents, conditionFailCount, true, disabledAgents);
// Update disabled agents count
disabledAgents = conditionFailCount;
return scattered_agents;
}
void CUDAFatAgentStateList::setDisabledAgents(const unsigned int numberOfDisabled) {
assert(numberOfDisabled <= aliveAgents);
disabledAgents = numberOfDisabled;
// update data_condition for each unique variable
for (const auto &v : variables_unique) {
char *data_p = reinterpret_cast<char*>(v->data);
v->data_condition = data_p + (numberOfDisabled * v->type_size * v->elements);
}
}
void CUDAFatAgentStateList::scatterSort_async(detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream) {
// This is not designed to run when there are disabled agents
assert(disabledAgents == 0);
// Build scatter data
std::vector<CUDAScatter::ScatterData> sd;
for (const auto &v : variables_unique) {
char *in_p = reinterpret_cast<char*>(v->data);
char *out_p = reinterpret_cast<char*>(v->data_swap);
sd.push_back({ v->type_size * v->elements, in_p, out_p });
// Pre swap stored pointers
std::swap(v->data, v->data_swap);
// Pre update data_condition
v->data_condition = out_p;
}
scatter.scatterPosition_async(streamId, stream, CUDAScatter::Type::MESSAGE_OUTPUT, sd, aliveAgents);
}
void CUDAFatAgentStateList::initVariables(std::set<std::shared_ptr<VariableBuffer>> &exclusionSet, const unsigned int initCount, const unsigned initOffset, detail::CUDAScatter &scatter, const unsigned int streamId, const cudaStream_t stream) {
if (initCount && exclusionSet.size()) {
assert(initCount + initOffset <= bufferLen);
std::list<std::shared_ptr<VariableBuffer>> initVars;
// Build list of init vars (to save repeating this process), and calculate memory requirements
for (const auto &v : variables_unique) {
if (exclusionSet.find(v) == exclusionSet.end()) {
initVars.push_back(v);
}
}
// Perform scatter
scatter.broadcastInit(streamId, stream, initVars, initCount, initOffset);
}
}
std::list<std::shared_ptr<VariableBuffer>> &CUDAFatAgentStateList::getUniqueVariables() { return variables_unique; }
void CUDAFatAgentStateList::swap(CUDAFatAgentStateList*other) {
std::swap(aliveAgents, other->aliveAgents);
std::swap(disabledAgents, other->disabledAgents);
std::swap(bufferLen, other->bufferLen);
for (auto a = variables_unique.begin(), b=other->variables_unique.begin(); a != variables_unique.end() && b != other->variables_unique.end(); ++a, ++b) {
(*a)->swap(b->get());
}
}
std::list<std::shared_ptr<VariableBuffer>> CUDAFatAgentStateList::getBuffers(std::set<std::shared_ptr<VariableBuffer>>& exclusionSet) {
std::list<std::shared_ptr<VariableBuffer>> returnVars;
for (const auto& v : variables_unique) {
if (exclusionSet.find(v) == exclusionSet.end()) {
returnVars.push_back(v);
}
}
return returnVars;
}
} // namespace detail
} // namespace flamegpu