.. _program_listing_file_include_flamegpu_runtime_agent_HostAgentAPI.cuh: Program Listing for File HostAgentAPI.cuh ========================================= |exhale_lsh| :ref:`Return to documentation for file ` (``include/flamegpu/runtime/agent/HostAgentAPI.cuh``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #ifndef INCLUDE_FLAMEGPU_RUNTIME_AGENT_HOSTAGENTAPI_CUH_ #define INCLUDE_FLAMEGPU_RUNTIME_AGENT_HOSTAGENTAPI_CUH_ #ifdef _MSC_VER #pragma warning(push, 1) #pragma warning(disable : 4706 4834) #endif // _MSC_VER #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ #pragma nv_diag_suppress 1719 #else #pragma diag_suppress 1719 #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ #include #include #include #include #include #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ #pragma nv_diag_default 1719 #else #pragma diag_default 1719 #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ #ifdef _MSC_VER #pragma warning(pop) #endif // _MSC_VER #include #include #include #include #include #include #include "flamegpu/simulation/detail/AgentInterface.h" #include "flamegpu/model/AgentDescription.h" #include "flamegpu/runtime/HostAPI.h" #include "flamegpu/simulation/CUDASimulation.h" #include "flamegpu/simulation/detail/CUDAAgent.h" #include "flamegpu/runtime/agent/DeviceAgentVector.h" #include "flamegpu/runtime/agent/DeviceAgentVector_impl.h" #include "flamegpu/simulation/AgentLoggingConfig_Reductions.cuh" #include "flamegpu/simulation/AgentLoggingConfig_SumReturn.h" #include "flamegpu/detail/type_decode.h" namespace flamegpu { #define FLAMEGPU_CUSTOM_REDUCTION(funcName, a, b)\ struct funcName ## _impl {\ public:\ template \ struct binary_function {\ __host__ __device__ __forceinline__ OutT operator()(const OutT &a, const OutT &b) const;\ };\ };\ funcName ## _impl funcName;\ template \ __host__ __device__ __forceinline__ OutT funcName ## _impl::binary_function::operator()(const OutT & a, const OutT & b) const #define FLAMEGPU_CUSTOM_TRANSFORM(funcName, a)\ struct funcName ## _impl {\ public:\ template\ struct unary_function {\ __host__ __device__ OutT operator()(const InT &a) const;\ };\ };\ funcName ## _impl funcName;\ template\ __device__ __forceinline__ OutT funcName ## _impl::unary_function::operator()(const InT &a) const class HostAgentAPI { friend class CUDASimulation; public: HostAgentAPI(HostAPI &_api, detail::AgentInterface &_agent, const std::string &_stateName, const VarOffsetStruct &_agentOffsets, HostAPI::AgentDataBuffer&_newAgentData) : api(_api) , agent(_agent) , stateName(_stateName) , agentOffsets(_agentOffsets) , newAgentData(_newAgentData) { } HostAgentAPI(const HostAgentAPI& other) : api(other.api) , agent(other.agent) , stateName(other.stateName) , agentOffsets(other.agentOffsets) , newAgentData(other.newAgentData) { } HostNewAgentAPI newAgent(); /* * Returns the number of agents in this state */ unsigned int count(); template InT sum(const std::string &variable) const; template OutT sum(const std::string &variable) const; template std::pair meanStandardDeviation(const std::string& variable) const; template InT min(const std::string &variable) const; template InT max(const std::string &variable) const; template unsigned int count(const std::string &variable, InT value) const; template std::vector histogramEven(const std::string &variable, unsigned int histogramBins, InT lowerBound, InT upperBound) const; template std::vector histogramEven(const std::string &variable, unsigned int histogramBins, InT lowerBound, InT upperBound) const; template InT reduce(const std::string &variable, reductionOperatorT reductionOperator, InT init) const; template OutT transformReduce(const std::string &variable, transformOperatorT transformOperator, reductionOperatorT reductionOperator, OutT init) const; enum Order {Asc, Desc}; template void sort(const std::string &variable, Order order, int beginBit = 0, int endBit = sizeof(VarT)*8); template void sort(const std::string &variable1, Order order1, const std::string &variable2, Order order2); DeviceAgentVector getPopulationData(); private: static void fillTIDArray_async(unsigned int *d_buffer, unsigned int length, cudaStream_t stream); static void sortBuffer_async(void *dest, void*src, unsigned int *position, size_t typeLen, unsigned int length, cudaStream_t stream); template void sum_async(const std::string& variable, OutT& result, cudaStream_t stream, unsigned int streamId) const; template void meanStandardDeviation_async(const std::string& variable, std::pair& result, cudaStream_t stream, unsigned int streamId) const; template void min_async(const std::string& variable, InT& result, cudaStream_t stream, unsigned int streamId) const; template void max_async(const std::string& variable, InT& result, cudaStream_t stream, unsigned int streamId) const; template unsigned int count_async(const std::string& variable, InT value, cudaStream_t stream) const; template void histogramEven_async(const std::string& variable, unsigned int histogramBins, InT lowerBound, InT upperBound, std::vector &result, cudaStream_t stream, unsigned int streamId) const; template void reduce_async(const std::string& variable, reductionOperatorT reductionOperator, InT init, InT &result, cudaStream_t stream, unsigned int streamId) const; template OutT transformReduce_async(const std::string& variable, transformOperatorT transformOperator, reductionOperatorT reductionOperator, OutT init, cudaStream_t stream) const; template void sort_async(const std::string& variable, Order order, int beginBit, int endBit, cudaStream_t stream, unsigned int streamId); template void sort_async(const std::string& variable1, Order order1, const std::string& variable2, Order order2, cudaStream_t stream, unsigned int streamId); HostAPI &api; detail::AgentInterface &agent; const std::string stateName; const VarOffsetStruct& agentOffsets; HostAPI::AgentDataBuffer& newAgentData; }; // // Implementation // template InT HostAgentAPI::sum(const std::string &variable) const { InT rtn; sum_async(variable, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template OutT HostAgentAPI::sum(const std::string& variable) const { OutT rtn; sum_async(variable, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template void HostAgentAPI::sum_async(const std::string &variable, OutT &result, const cudaStream_t stream, const unsigned int streamId) const { static_assert(sizeof(InT) <= sizeof(OutT), "Template arg OutT should not be of a smaller size than InT"); std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::sum() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::sum(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Check if we need to resize cub storage auto &cub_temp = api.scatter.CubTemp(streamId); size_t tempByte = 0; gpuErrchk(cub::DeviceReduce::Sum(nullptr, tempByte, reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); cub_temp.resize(tempByte); // Resize output storage api.resizeOutputSpace(); gpuErrchk(cub::DeviceReduce::Sum(cub_temp.getPtr(), cub_temp.getSize(), reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); gpuErrchkLaunch(); gpuErrchk(cudaMemcpyAsync(&result, api.d_output_space, sizeof(OutT), cudaMemcpyDeviceToHost, stream)); } template std::pair HostAgentAPI::meanStandardDeviation(const std::string& variable) const { std::pair rtn; meanStandardDeviation_async(variable, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); // Redundant, meanStandardDeviation_async() is not truly async return rtn; } template void HostAgentAPI::meanStandardDeviation_async(const std::string& variable, std::pair &result, const cudaStream_t stream, const unsigned int streamId) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::meanStandardDeviation() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::meanStandardDeviation(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } const auto agentCount = agent.getStateSize(stateName); if (agentCount == 0) { result = std::make_pair(0.0, 0.0); } // Calculate mean (We could make this more efficient by leaving sum in device mem?) typename sum_input_t::result_t sum_result; sum_async::result_t>(variable, sum_result, stream, streamId); gpuErrchk(cudaStreamSynchronize(stream)); const double mean = sum_result / static_cast(agentCount); // Then for each number: subtract the Mean and square the result // Then work out the mean of those squared differences. auto lock = std::unique_lock(detail::STANDARD_DEVIATION_MEAN_mutex); gpuErrchk(cudaMemcpyToSymbolAsync(detail::STANDARD_DEVIATION_MEAN, &mean, sizeof(double), 0, cudaMemcpyHostToDevice, stream)); const double variance = transformReduce_async(variable, detail::standard_deviation_subtract_mean, detail::standard_deviation_add, 0, stream) / static_cast(agentCount); lock.unlock(); // Take the square root of that and we are done! result = std::make_pair(mean, sqrt(variance)); } template InT HostAgentAPI::min(const std::string& variable) const { InT rtn; min_async(variable, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template void HostAgentAPI::min_async(const std::string &variable, InT& result, const cudaStream_t stream, const unsigned int streamId) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::lowerBound() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::min(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Check if we need to resize cub storage auto& cub_temp = api.scatter.CubTemp(streamId); // Resize cub storage size_t tempByte = 0; gpuErrchk(cub::DeviceReduce::Min(nullptr, tempByte, reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); gpuErrchkLaunch(); cub_temp.resize(tempByte); // Resize output storage api.resizeOutputSpace(); gpuErrchk(cub::DeviceReduce::Min(cub_temp.getPtr(), cub_temp.getSize(), reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); gpuErrchkLaunch(); gpuErrchk(cudaMemcpyAsync(&result, api.d_output_space, sizeof(InT), cudaMemcpyDeviceToHost, stream)); } template InT HostAgentAPI::max(const std::string& variable) const { InT rtn; max_async(variable, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template void HostAgentAPI::max_async(const std::string &variable, InT &result, const cudaStream_t stream, const unsigned int streamId) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::max() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::max(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Check if we need to resize cub storage auto& cub_temp = api.scatter.CubTemp(streamId); // Resize cub storage size_t tempByte = 0; gpuErrchk(cub::DeviceReduce::Max(nullptr, tempByte, reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); cub_temp.resize(tempByte); // Resize output storage api.resizeOutputSpace(); gpuErrchk(cub::DeviceReduce::Max(cub_temp.getPtr(), cub_temp.getSize(), reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), stream)); gpuErrchkLaunch(); gpuErrchk(cudaMemcpyAsync(&result, api.d_output_space, sizeof(InT), cudaMemcpyDeviceToHost, stream)); } template unsigned int HostAgentAPI::count(const std::string &variable, InT value) const { return count_async(variable, value, this->api.stream); } template unsigned int HostAgentAPI::count_async(const std::string& variable, InT value, const cudaStream_t stream) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::count() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::count(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Cast return from ptrdiff_t (int64_t) to (uint32_t) unsigned int rtn = static_cast(thrust::count(thrust::cuda::par.on(stream), thrust::device_ptr(reinterpret_cast(var_ptr)), thrust::device_ptr(reinterpret_cast(var_ptr) + agentCount), value)); gpuErrchkLaunch(); return rtn; } template std::vector HostAgentAPI::histogramEven(const std::string &variable, unsigned int histogramBins, InT lowerBound, InT upperBound) const { std::vector rtn; histogramEven_async(variable, histogramBins, lowerBound, upperBound, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template std::vector HostAgentAPI::histogramEven(const std::string &variable, unsigned int histogramBins, InT lowerBound, InT upperBound) const { std::vector rtn; histogramEven_async(variable, histogramBins, lowerBound, upperBound, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template void HostAgentAPI::histogramEven_async(const std::string &variable, unsigned int histogramBins, InT lowerBound, InT upperBound, std::vector& result, const cudaStream_t stream, const unsigned int streamId) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } if (lowerBound >= upperBound) { THROW exception::InvalidArgument("lowerBound (%s) must be lower than < upperBound (%s) in HostAgentAPI::histogramEven().", std::to_string(lowerBound).c_str(), std::to_string(upperBound).c_str()); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::histogramEven() does not support agent array variables."); } if (std::type_index(typeid(InT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::histogramEven(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(InT).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Check if we need to resize cub storage auto& cub_temp = api.scatter.CubTemp(streamId); // Resize cub storage size_t tempByte = 0; gpuErrchk(cub::DeviceHistogram::HistogramEven(nullptr, tempByte, reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), histogramBins + 1, lowerBound, upperBound, static_cast(agentCount), stream)); gpuErrchkLaunch(); cub_temp.resize(tempByte); // Resize output storage api.resizeOutputSpace(histogramBins); gpuErrchk(cub::DeviceHistogram::HistogramEven(cub_temp.getPtr(), cub_temp.getSize(), reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), histogramBins + 1, lowerBound, upperBound, static_cast(agentCount), stream)); gpuErrchkLaunch(); result.resize(histogramBins); gpuErrchk(cudaMemcpyAsync(result.data(), api.d_output_space, histogramBins * sizeof(OutT), cudaMemcpyDeviceToHost, stream)); } template InT HostAgentAPI::reduce(const std::string &variable, reductionOperatorT reductionOperator, InT init) const { InT rtn; reduce_async(variable, reductionOperator, init, rtn, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); return rtn; } template void HostAgentAPI::reduce_async(const std::string & variable, reductionOperatorT /*reductionOperator*/, InT init, InT &result, const cudaStream_t stream, const unsigned int streamId) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != detail::type_decode::len_t) { THROW exception::UnsupportedVarType("HostAgentAPI::reduce() does not support agent array variables."); } if (std::type_index(typeid(typename detail::type_decode::type_t)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::reduce(). " "This call expects '%s', but '%s' was requested.", typ.name(), typeid(typename detail::type_decode::type_t).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); // Check if we need to resize cub storage auto& cub_temp = api.scatter.CubTemp(streamId); // Resize cub storage size_t tempByte = 0; gpuErrchk(cub::DeviceReduce::Reduce(nullptr, tempByte, reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), typename reductionOperatorT::template binary_function(), init, stream)); gpuErrchkLaunch(); cub_temp.resize(tempByte); // Resize output storage api.resizeOutputSpace(); gpuErrchk(cub::DeviceReduce::Reduce(cub_temp.getPtr(), cub_temp.getSize(), reinterpret_cast(var_ptr), reinterpret_cast(api.d_output_space), static_cast(agentCount), typename reductionOperatorT::template binary_function(), init, stream)); gpuErrchkLaunch(); gpuErrchk(cudaMemcpyAsync(&result, api.d_output_space, sizeof(InT), cudaMemcpyDeviceToHost, stream)); } template OutT HostAgentAPI::transformReduce(const std::string &variable, transformOperatorT transformOperator, reductionOperatorT reductionOperator, OutT init) const { return transformReduce_async(variable, transformOperator, reductionOperator, init, this->api.stream); } template OutT HostAgentAPI::transformReduce_async(const std::string &variable, transformOperatorT /*transformOperator*/, reductionOperatorT /*reductionOperator*/, OutT init, cudaStream_t stream) const { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } const CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != detail::type_decode::len_t) { THROW exception::UnsupportedVarType("HostAgentAPI::transformReduce() does not support agent array variables."); } if (std::type_index(typeid(typename detail::type_decode::type_t)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::transformReduce(). " "This call expects '%s', but '%s' was requested.", typ.name(), typeid(typename detail::type_decode::type_t).name()); } void *var_ptr = agent.getStateVariablePtr(stateName, variable); const auto agentCount = agent.getStateSize(stateName); OutT rtn = thrust::transform_reduce(thrust::cuda::par.on(stream), thrust::device_ptr(reinterpret_cast(var_ptr)), thrust::device_ptr(reinterpret_cast(var_ptr) + agentCount), typename transformOperatorT::template unary_function(), init, typename reductionOperatorT::template binary_function()); gpuErrchkLaunch(); return rtn; } template void HostAgentAPI::sort(const std::string &variable, Order order, int beginBit, int endBit) { sort_async(variable, order, beginBit, endBit, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); } template void HostAgentAPI::sort_async(const std::string & variable, Order order, int beginBit, int endBit, const cudaStream_t stream, const unsigned int streamId) { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream || this->api.streamId != streamId) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } auto &scatter = api.agentModel.singletons->scatter; auto &scan = scatter.Scan(); // Check variable is valid CAgentDescription agentDesc(agent.getAgentDescription()); std::type_index typ = agentDesc.getVariableType(variable); // This will throw name exception if (agentDesc.getVariableLength(variable) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::sort() does not support agent array variables."); } if (std::type_index(typeid(VarT)) != typ) { THROW exception::InvalidVarType("Wrong variable type passed to HostAgentAPI::sort(). " "This call expects '%s', but '%s' was requested.", agentDesc.getVariableType(variable).name(), typeid(VarT).name()); } // We will use scan_flag agent_death/message_output here so resize const unsigned int agentCount = agent.getStateSize(stateName); void *var_ptr = agent.getStateVariablePtr(stateName, variable); const size_t total_variable_buffer_size = sizeof(VarT) * agentCount; const unsigned int fake_num_agent = static_cast(total_variable_buffer_size/sizeof(unsigned int)) +1; scan.resize(fake_num_agent, detail::CUDAScanCompaction::AGENT_DEATH, streamId); scan.resize(agentCount, detail::CUDAScanCompaction::MESSAGE_OUTPUT, streamId); VarT *keys_in = reinterpret_cast(scan.Config(detail::CUDAScanCompaction::Type::AGENT_DEATH, streamId).d_ptrs.scan_flag); VarT *keys_out = reinterpret_cast(scan.Config(detail::CUDAScanCompaction::Type::AGENT_DEATH, streamId).d_ptrs.position); unsigned int *vals_in = scan.Config(detail::CUDAScanCompaction::Type::MESSAGE_OUTPUT, streamId).d_ptrs.scan_flag; unsigned int *vals_out = scan.Config(detail::CUDAScanCompaction::Type::MESSAGE_OUTPUT, streamId).d_ptrs.position; // Create array of TID (use scanflag_death.position) fillTIDArray_async(vals_in, agentCount, stream); // Create array of agent values (use scanflag_death.scan_flag) gpuErrchk(cudaMemcpyAsync(keys_in, var_ptr, total_variable_buffer_size, cudaMemcpyDeviceToDevice, stream)); // Check if we need to resize cub storage auto& cub_temp = api.scatter.CubTemp(streamId); // Resize cub storage size_t tempByte = 0; if (order == Asc) { gpuErrchk(cub::DeviceRadixSort::SortPairs(nullptr, tempByte, keys_in, keys_out, vals_in, vals_out, agentCount, beginBit, endBit, stream)); } else { gpuErrchk(cub::DeviceRadixSort::SortPairsDescending(nullptr, tempByte, keys_in, keys_out, vals_in, vals_out, agentCount, beginBit, endBit, stream)); } cub_temp.resize(tempByte); // pair sort if (order == Asc) { gpuErrchk(cub::DeviceRadixSort::SortPairs(cub_temp.getPtr(), cub_temp.getSize(), keys_in, keys_out, vals_in, vals_out, agentCount, beginBit, endBit, stream)); } else { gpuErrchk(cub::DeviceRadixSort::SortPairsDescending(cub_temp.getPtr(), cub_temp.getSize(), keys_in, keys_out, vals_in, vals_out, agentCount, beginBit, endBit, stream)); } // Scatter all agent variables api.agentModel.agent_map.at(agentDesc.getName())->scatterSort_async(stateName, scatter, streamId, stream); if (population) { // If the user has a DeviceAgentVector out, purge cache so it redownloads new data on next use population->purgeCache(); } } template void HostAgentAPI::sort(const std::string &variable1, Order order1, const std::string &variable2, Order order2) { sort_async(variable1, order1, variable2, order2, this->api.stream, this->api.streamId); gpuErrchk(cudaStreamSynchronize(this->api.stream)); } template void HostAgentAPI::sort_async(const std::string & variable1, Order order1, const std::string & variable2, Order order2, const cudaStream_t stream, const unsigned int streamId) { std::shared_ptr population = agent.getPopulationVec(stateName); if (population) { if (this->api.stream != stream || this->api.streamId != streamId) { THROW exception::InvalidOperation("Attempting to sync DeviceAgentVector with wrong stream!\nThis should not be possible.\n"); } // If the user has a DeviceAgentVector out, sync changes population->syncChanges(); } auto &scatter = api.agentModel.singletons->scatter; auto &scan = scatter.Scan(); const CAgentDescription agentDesc(agent.getAgentDescription()); { // Check variable 1 is valid std::type_index typ = agentDesc.getVariableType(variable1); // This will throw name exception if (agentDesc.getVariableLength(variable1) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::sort() does not support agent array variables."); } if (std::type_index(typeid(Var1T)) != typ) { THROW exception::InvalidVarType("Wrong type for variable '%s' passed to HostAgentAPI::sort(). " "This call expects '%s', but '%s' was requested.", variable1.c_str(), agentDesc.getVariableType(variable1).name(), typeid(Var1T).name()); } } { // Check variable 2 is valid std::type_index typ = agentDesc.getVariableType(variable2); // This will throw name exception if (agentDesc.getVariableLength(variable2) != 1) { THROW exception::UnsupportedVarType("HostAgentAPI::sort() does not support agent array variables."); } if (std::type_index(typeid(Var2T)) != typ) { THROW exception::InvalidVarType("Wrong type for variable '%s' passed to HostAgentAPI::sort(). " "This call expects '%s', but '%s' was requested.", variable2.c_str(), agentDesc.getVariableType(variable2).name(), typeid(Var2T).name()); } } const unsigned int agentCount = agent.getStateSize(stateName); // Fill array with var1 keys { // Resize const size_t total_variable_buffer_size = sizeof(Var1T) * agentCount; const unsigned int fake_num_agent = static_cast(total_variable_buffer_size/sizeof(unsigned int)) +1; scan.resize(fake_num_agent, detail::CUDAScanCompaction::AGENT_DEATH, streamId); // Fill void *keys1b = scan.Config(detail::CUDAScanCompaction::Type::AGENT_DEATH, streamId).d_ptrs.position; void *var_ptr = agent.getStateVariablePtr(stateName, variable1); gpuErrchk(cudaMemcpyAsync(keys1b, var_ptr, total_variable_buffer_size, cudaMemcpyDeviceToDevice, stream)); } // Fill array with var2 keys { // Resize const size_t total_variable_buffer_size = sizeof(Var2T) * agentCount; const unsigned int fake_num_agent = static_cast(total_variable_buffer_size/sizeof(unsigned int)) +1; scan.resize(std::max(agentCount, fake_num_agent), detail::CUDAScanCompaction::MESSAGE_OUTPUT, streamId); // Fill void *keys2 = scan.Config(detail::CUDAScanCompaction::Type::MESSAGE_OUTPUT, streamId).d_ptrs.scan_flag; void *var_ptr = agent.getStateVariablePtr(stateName, variable2); gpuErrchk(cudaMemcpyAsync(keys2, var_ptr, total_variable_buffer_size, cudaMemcpyDeviceToDevice, stream)); } // Define our buffers (here, after resize) Var1T *keys1 = reinterpret_cast(scan.Config(detail::CUDAScanCompaction::Type::AGENT_DEATH, streamId).d_ptrs.scan_flag); Var1T *keys1b = reinterpret_cast(scan.Config(detail::CUDAScanCompaction::Type::AGENT_DEATH, streamId).d_ptrs.position); Var2T *keys2 = reinterpret_cast(scan.Config(detail::CUDAScanCompaction::Type::MESSAGE_OUTPUT, streamId).d_ptrs.scan_flag); unsigned int *vals = scan.Config(detail::CUDAScanCompaction::Type::MESSAGE_OUTPUT, streamId).d_ptrs.position; // Init value array fillTIDArray_async(vals, agentCount, stream); // Process variable 2 first { // pair sort values if (order2 == Asc) { thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr(keys2), thrust::device_ptr(keys2 + agentCount), thrust::device_ptr(vals), thrust::less()); } else { thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr(keys2), thrust::device_ptr(keys2 + agentCount), thrust::device_ptr(vals), thrust::greater()); } gpuErrchkLaunch(); // sort keys1 based on this order sortBuffer_async(keys1, keys1b, vals, sizeof(Var1T), agentCount, stream); } // Process variable 1 second { // pair sort if (order1 == Asc) { thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr(keys1), thrust::device_ptr(keys1 + agentCount), thrust::device_ptr(vals), thrust::less()); } else { thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr(keys1), thrust::device_ptr(keys1 + agentCount), thrust::device_ptr(vals), thrust::greater()); } gpuErrchkLaunch(); } // Scatter all agent variables api.agentModel.agent_map.at(agentDesc.getName())->scatterSort_async(stateName, scatter, streamId, stream); if (population) { // If the user has a DeviceAgentVector out, purge cache so it redownloads new data on next use population->purgeCache(); } } } // namespace flamegpu #endif // INCLUDE_FLAMEGPU_RUNTIME_AGENT_HOSTAGENTAPI_CUH_