.. _program_listing_file_include_flamegpu_simulation_detail_MPIEnsemble.h: Program Listing for File MPIEnsemble.h ====================================== |exhale_lsh| :ref:`Return to documentation for file ` (``include/flamegpu/simulation/detail/MPIEnsemble.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #ifndef INCLUDE_FLAMEGPU_SIMULATION_DETAIL_MPIENSEMBLE_H_ #define INCLUDE_FLAMEGPU_SIMULATION_DETAIL_MPIENSEMBLE_H_ #include #include #include #include #include #include #include "flamegpu/simulation/CUDAEnsemble.h" #include "flamegpu/simulation/detail/MPISimRunner.h" namespace flamegpu { namespace detail { class MPIEnsemble { const CUDAEnsemble::EnsembleConfig &config; // Tags to different the MPI messages used in protocol enum EnvelopeTag : int { // Sent from worker to manager to request a job index to process RequestJob = 0, // Sent from manager to worker to assign a job index to process in response to AssignJob AssignJob = 1, // Sent from worker to manager to report an error during job execution // If fail fast is enabled, following RequestJob will receive an exit job id (>=plans.size()) ReportError = 2, // Sent from worker to manager to report GPUs for telemetry TelemetryDevices = 3, }; public: const int world_rank; const int world_size; const int local_rank; const int local_size; const unsigned int total_runs; explicit MPIEnsemble(const CUDAEnsemble::EnsembleConfig &_config, unsigned int _total_runs); int receiveErrors(std::multimap &err_detail); int receiveJobRequests(unsigned int &next_run); void sendErrorDetail(AbstractSimRunner::ErrorDetail &e_detail); int requestJob(); void worldBarrier(); std::string assembleGPUsString(); void retrieveLocalErrorDetail(std::mutex &log_export_queue_mutex, std::multimap &err_detail, std::vector &err_detail_local, int i, std::set devices); bool createParticipatingCommunicator(bool isParticipating); int getRankIsParticipating() { return this->rank_is_participating; } int getParticipatingCommSize() { return this->participating_size; } int getParticipatingCommRank() { return this->participating_rank; } static std::set devicesForThisRank(std::set devicesToSelectFrom, int local_size, int local_rank); std::set devicesForThisRank(std::set devicesToSelectFrom); private: static int queryMPIWorldRank(); static int queryMPIWorldSize(); static int queryMPISharedGroupRank(); static int queryMPISharedGroupSize(); static void initMPI(); unsigned int getDeviceIndex(const int j, std::set devices); const MPI_Datatype MPI_ERROR_DETAIL; bool rank_is_participating; MPI_Comm comm_participating; int participating_size; int participating_rank; }; } // namespace detail } // namespace flamegpu #endif // INCLUDE_FLAMEGPU_SIMULATION_DETAIL_MPIENSEMBLE_H_