XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_replay,smpi,"Trace Replay with SMPI");
-static int communicator_size = 0;
static int active_processes = 0;
static std::unordered_map<int, std::vector<MPI_Request>*> reqq;
static_cast<unsigned long>(optional)); \
}
-static void log_timed_action (simgrid::xbt::ReplayAction& action, double clock){
+static void log_timed_action(simgrid::xbt::ReplayAction& action, double clock)
+{
if (XBT_LOG_ISENABLED(smpi_replay, xbt_log_priority_verbose)){
std::string s = boost::algorithm::join(action, " ");
- XBT_VERB("%s %f", s.c_str(), smpi_process()->simulated_elapsed()-clock);
+ XBT_VERB("%s %f", s.c_str(), smpi_process()->simulated_elapsed() - clock);
}
}
namespace simgrid {
namespace smpi {
+namespace Replay {
+class ActionArgParser {
+public:
+ virtual void parse(simgrid::xbt::ReplayAction& action) { CHECK_ACTION_PARAMS(action, 0, 0) }
+};
+
+class SendRecvParser : public ActionArgParser {
+public:
+ /* communication partner; if we send, this is the receiver and vice versa */
+ int partner;
+ double size;
+ MPI_Datatype datatype1 = MPI_DEFAULT_TYPE;
+
+ void parse(simgrid::xbt::ReplayAction& action) override
+ {
+ CHECK_ACTION_PARAMS(action, 2, 1)
+ partner = std::stoi(action[2]);
+ size = parse_double(action[3]);
+ if (action.size() > 4)
+ datatype1 = simgrid::smpi::Datatype::decode(action[4]);
+ }
+};
+
+class ComputeParser : public ActionArgParser {
+public:
+ /* communication partner; if we send, this is the receiver and vice versa */
+ double flops;
+
+ void parse(simgrid::xbt::ReplayAction& action) override
+ {
+ CHECK_ACTION_PARAMS(action, 1, 0)
+ flops = parse_double(action[2]);
+ }
+};
+
+template <class T> class ReplayAction {
+protected:
+ const std::string name;
+ T args;
+
+ int my_proc_id;
+
+public:
+ explicit ReplayAction(std::string name) : name(name), my_proc_id(simgrid::s4u::Actor::self()->getPid()) {}
+
+ virtual void execute(simgrid::xbt::ReplayAction& action)
+ {
+ // Needs to be re-initialized for every action, hence here
+ double start_time = smpi_process()->simulated_elapsed();
+ args.parse(action);
+ kernel(action);
+ log_timed_action(action, start_time);
+ }
+
+ virtual void kernel(simgrid::xbt::ReplayAction& action) = 0;
+};
+
+class WaitAction : public ReplayAction<ActionArgParser> {
+public:
+ WaitAction() : ReplayAction("Wait") {}
+ void kernel(simgrid::xbt::ReplayAction& action) override
+ {
+ CHECK_ACTION_PARAMS(action, 0, 0)
+ MPI_Status status;
+
+ std::string s = boost::algorithm::join(action, " ");
+ xbt_assert(get_reqq_self()->size(), "action wait not preceded by any irecv or isend: %s", s.c_str());
+ MPI_Request request = get_reqq_self()->back();
+ get_reqq_self()->pop_back();
+
+ if (request == nullptr) {
+ /* Assume that the trace is well formed, meaning the comm might have been caught by a MPI_test. Then just
+ * return.*/
+ return;
+ }
+
+ int rank = request->comm() != MPI_COMM_NULL ? request->comm()->rank() : -1;
+
+ // Must be taken before Request::wait() since the request may be set to
+ // MPI_REQUEST_NULL by Request::wait!
+ int src = request->comm()->group()->rank(request->src());
+ int dst = request->comm()->group()->rank(request->dst());
+ bool is_wait_for_receive = (request->flags() & RECV);
+ // TODO: Here we take the rank while we normally take the process id (look for my_proc_id)
+ TRACE_smpi_comm_in(rank, __FUNCTION__, new simgrid::instr::NoOpTIData("wait"));
+
+ Request::wait(&request, &status);
+
+ TRACE_smpi_comm_out(rank);
+ if (is_wait_for_receive)
+ TRACE_smpi_recv(src_traced, dst_traced, 0);
+ }
+};
+
+class SendAction : public ReplayAction<SendRecvParser> {
+public:
+ SendAction() = delete;
+ SendAction(std::string name) : ReplayAction(name) {}
+ void kernel(simgrid::xbt::ReplayAction& action) override
+ {
+ int dst_traced = MPI_COMM_WORLD->group()->actor(args.partner)->getPid();
+
+ TRACE_smpi_comm_in(my_proc_id, __FUNCTION__, new simgrid::instr::Pt2PtTIData(name, args.partner, args.size,
+ Datatype::encode(args.datatype1)));
+ if (not TRACE_smpi_view_internals())
+ TRACE_smpi_send(my_proc_id, my_proc_id, dst_traced, 0, args.size * args.datatype1->size());
+
+ if (name == "send") {
+ Request::send(nullptr, args.size, args.datatype1, args.partner, 0, MPI_COMM_WORLD);
+ } else if (name == "Isend") {
+ MPI_Request request = Request::isend(nullptr, args.size, args.datatype1, args.partner, 0, MPI_COMM_WORLD);
+ get_reqq_self()->push_back(request);
+ } else {
+ xbt_die("Don't know this action, %s", name.c_str());
+ }
+
+ TRACE_smpi_comm_out(my_proc_id);
+ }
+};
+
+class RecvAction : public ReplayAction<SendRecvParser> {
+public:
+ RecvAction() = delete;
+ explicit RecvAction(std::string name) : ReplayAction(name) {}
+ void kernel(simgrid::xbt::ReplayAction& action) override
+ {
+ int src_traced = MPI_COMM_WORLD->group()->actor(args.partner)->getPid();
+
+ TRACE_smpi_comm_in(my_proc_id, __FUNCTION__, new simgrid::instr::Pt2PtTIData(name, args.partner, args.size,
+ Datatype::encode(args.datatype1)));
+
+ MPI_Status status;
+ // unknown size from the receiver point of view
+ if (args.size <= 0.0) {
+ Request::probe(args.partner, 0, MPI_COMM_WORLD, &status);
+ args.size = status.count;
+ }
+
+ if (name == "recv") {
+ Request::recv(nullptr, args.size, args.datatype1, args.partner, 0, MPI_COMM_WORLD, &status);
+ } else if (name == "Irecv") {
+ MPI_Request request = Request::irecv(nullptr, args.size, args.datatype1, args.partner, 0, MPI_COMM_WORLD);
+ get_reqq_self()->push_back(request);
+ }
+
+ TRACE_smpi_comm_out(my_proc_id);
+ // TODO: Check why this was only activated in the "recv" case and not in the "Irecv" case
+ if (name == "recv" && not TRACE_smpi_view_internals()) {
+ TRACE_smpi_recv(src_traced, my_proc_id, 0);
+ }
+ }
+};
+
+class ComputeAction : public ReplayAction<ComputeParser> {
+public:
+ ComputeAction() : ReplayAction("compute") {}
+ void kernel(simgrid::xbt::ReplayAction& action) override
+ {
+ TRACE_smpi_computing_in(my_proc_id, args.flops);
+ smpi_execute_flops(args.flops);
+ TRACE_smpi_computing_out(my_proc_id);
+ }
+};
+
+class TestAction : public ReplayAction<ActionArgParser> {
+public:
+ TestAction() : ReplayAction("Test") {}
+ void kernel(simgrid::xbt::ReplayAction& action) override
+ {
+ MPI_Request request = get_reqq_self()->back();
+ get_reqq_self()->pop_back();
+ // if request is null here, this may mean that a previous test has succeeded
+ // Different times in traced application and replayed version may lead to this
+ // In this case, ignore the extra calls.
+ if (request != nullptr) {
+ TRACE_smpi_testing_in(my_proc_id);
+
+ MPI_Status status;
+ int flag = Request::test(&request, &status);
+
+ XBT_DEBUG("MPI_Test result: %d", flag);
+ /* push back request in vector to be caught by a subsequent wait. if the test did succeed, the request is now
+ * nullptr.*/
+ get_reqq_self()->push_back(request);
+
+ TRACE_smpi_testing_out(my_proc_id);
+ }
+ }
+};
+
+} // Replay Namespace
+
static void action_init(simgrid::xbt::ReplayAction& action)
{
XBT_DEBUG("Initialize the counters");
static void action_comm_size(simgrid::xbt::ReplayAction& action)
{
- communicator_size = parse_double(action[2]);
log_timed_action (action, smpi_process()->simulated_elapsed());
}
log_timed_action (action, smpi_process()->simulated_elapsed());
}
-static void action_compute(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 1, 0)
- double clock = smpi_process()->simulated_elapsed();
- double flops= parse_double(action[2]);
- int my_proc_id = Actor::self()->getPid();
-
- TRACE_smpi_computing_in(my_proc_id, flops);
- smpi_execute_flops(flops);
- TRACE_smpi_computing_out(my_proc_id);
-
- log_timed_action (action, clock);
-}
-
-static void action_send(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 2, 1)
- int to = std::stoi(action[2]);
- double size=parse_double(action[3]);
- double clock = smpi_process()->simulated_elapsed();
-
- MPI_Datatype MPI_CURRENT_TYPE = (action.size() > 4) ? simgrid::smpi::Datatype::decode(action[4]) : MPI_DEFAULT_TYPE;
-
- int my_proc_id = Actor::self()->getPid();
- int dst_traced = MPI_COMM_WORLD->group()->actor(to)->getPid();
-
- TRACE_smpi_comm_in(my_proc_id, __FUNCTION__,
- new simgrid::instr::Pt2PtTIData("send", to, size, Datatype::encode(MPI_CURRENT_TYPE)));
- if (not TRACE_smpi_view_internals())
- TRACE_smpi_send(my_proc_id, my_proc_id, dst_traced, 0, size * MPI_CURRENT_TYPE->size());
-
- Request::send(nullptr, size, MPI_CURRENT_TYPE, to , 0, MPI_COMM_WORLD);
-
- TRACE_smpi_comm_out(my_proc_id);
-
- log_timed_action(action, clock);
-}
-
-static void action_Isend(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 2, 1)
- int to = std::stoi(action[2]);
- double size=parse_double(action[3]);
- double clock = smpi_process()->simulated_elapsed();
-
- MPI_Datatype MPI_CURRENT_TYPE = (action.size() > 4) ? simgrid::smpi::Datatype::decode(action[4]) : MPI_DEFAULT_TYPE;
-
- int my_proc_id = Actor::self()->getPid();
- int dst_traced = MPI_COMM_WORLD->group()->actor(to)->getPid();
- TRACE_smpi_comm_in(my_proc_id, __FUNCTION__,
- new simgrid::instr::Pt2PtTIData("Isend", to, size, Datatype::encode(MPI_CURRENT_TYPE)));
- if (not TRACE_smpi_view_internals())
- TRACE_smpi_send(my_proc_id, my_proc_id, dst_traced, 0, size * MPI_CURRENT_TYPE->size());
-
- MPI_Request request = Request::isend(nullptr, size, MPI_CURRENT_TYPE, to, 0, MPI_COMM_WORLD);
-
- TRACE_smpi_comm_out(my_proc_id);
-
- get_reqq_self()->push_back(request);
-
- log_timed_action (action, clock);
-}
-
-static void action_recv(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 2, 1)
- int from = std::stoi(action[2]);
- double size=parse_double(action[3]);
- double clock = smpi_process()->simulated_elapsed();
- MPI_Status status;
-
- MPI_Datatype MPI_CURRENT_TYPE = (action.size() > 4) ? simgrid::smpi::Datatype::decode(action[4]) : MPI_DEFAULT_TYPE;
-
- int my_proc_id = Actor::self()->getPid();
- int src_traced = MPI_COMM_WORLD->group()->actor(from)->getPid();
-
- TRACE_smpi_comm_in(my_proc_id, __FUNCTION__,
- new simgrid::instr::Pt2PtTIData("recv", from, size, Datatype::encode(MPI_CURRENT_TYPE)));
-
- //unknown size from the receiver point of view
- if (size <= 0.0) {
- Request::probe(from, 0, MPI_COMM_WORLD, &status);
- size=status.count;
- }
-
- Request::recv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD, &status);
-
- TRACE_smpi_comm_out(my_proc_id);
- if (not TRACE_smpi_view_internals()) {
- TRACE_smpi_recv(src_traced, my_proc_id, 0);
- }
-
- log_timed_action (action, clock);
-}
-
-static void action_Irecv(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 2, 1)
- int from = std::stoi(action[2]);
- double size=parse_double(action[3]);
- double clock = smpi_process()->simulated_elapsed();
-
- MPI_Datatype MPI_CURRENT_TYPE = (action.size() > 4) ? simgrid::smpi::Datatype::decode(action[4]) : MPI_DEFAULT_TYPE;
-
- int my_proc_id = Actor::self()->getPid();
- TRACE_smpi_comm_in(my_proc_id, __FUNCTION__,
- new simgrid::instr::Pt2PtTIData("Irecv", from, size, Datatype::encode(MPI_CURRENT_TYPE)));
- MPI_Status status;
- //unknow size from the receiver pov
- if (size <= 0.0) {
- Request::probe(from, 0, MPI_COMM_WORLD, &status);
- size = status.count;
- }
-
- MPI_Request request = Request::irecv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD);
-
- TRACE_smpi_comm_out(my_proc_id);
- get_reqq_self()->push_back(request);
-
- log_timed_action (action, clock);
-}
-
-static void action_test(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 0, 0)
- double clock = smpi_process()->simulated_elapsed();
- MPI_Status status;
-
- MPI_Request request = get_reqq_self()->back();
- get_reqq_self()->pop_back();
- //if request is null here, this may mean that a previous test has succeeded
- //Different times in traced application and replayed version may lead to this
- //In this case, ignore the extra calls.
- if(request!=nullptr){
- int my_proc_id = Actor::self()->getPid();
- TRACE_smpi_testing_in(my_proc_id);
-
- int flag = Request::test(&request, &status);
-
- XBT_DEBUG("MPI_Test result: %d", flag);
- /* push back request in vector to be caught by a subsequent wait. if the test did succeed, the request is now nullptr.*/
- get_reqq_self()->push_back(request);
-
- TRACE_smpi_testing_out(my_proc_id);
- }
- log_timed_action (action, clock);
-}
-
-static void action_wait(simgrid::xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 0, 0)
- double clock = smpi_process()->simulated_elapsed();
- MPI_Status status;
-
- std::string s = boost::algorithm::join(action, " ");
- xbt_assert(get_reqq_self()->size(), "action wait not preceded by any irecv or isend: %s", s.c_str());
- MPI_Request request = get_reqq_self()->back();
- get_reqq_self()->pop_back();
-
- if (request==nullptr){
- /* Assume that the trace is well formed, meaning the comm might have been caught by a MPI_test. Then just return.*/
- return;
- }
-
- int rank = request->comm() != MPI_COMM_NULL ? request->comm()->rank() : -1;
-
- MPI_Group group = request->comm()->group();
- int src_traced = group->rank(request->src());
- int dst_traced = group->rank(request->dst());
- int is_wait_for_receive = (request->flags() & RECV);
- TRACE_smpi_comm_in(rank, __FUNCTION__, new simgrid::instr::NoOpTIData("wait"));
-
- Request::wait(&request, &status);
-
- TRACE_smpi_comm_out(rank);
- if (is_wait_for_receive)
- TRACE_smpi_recv(src_traced, dst_traced, 0);
- log_timed_action (action, clock);
-}
-
static void action_waitall(simgrid::xbt::ReplayAction& action)
{
CHECK_ACTION_PARAMS(action, 0, 0)
xbt_replay_action_register("comm_size", simgrid::smpi::action_comm_size);
xbt_replay_action_register("comm_split", simgrid::smpi::action_comm_split);
xbt_replay_action_register("comm_dup", simgrid::smpi::action_comm_dup);
- xbt_replay_action_register("send", simgrid::smpi::action_send);
- xbt_replay_action_register("Isend", simgrid::smpi::action_Isend);
- xbt_replay_action_register("recv", simgrid::smpi::action_recv);
- xbt_replay_action_register("Irecv", simgrid::smpi::action_Irecv);
- xbt_replay_action_register("test", simgrid::smpi::action_test);
- xbt_replay_action_register("wait", simgrid::smpi::action_wait);
+
+ xbt_replay_action_register("send", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::SendAction("send").execute(action); });
+ xbt_replay_action_register("Isend", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::SendAction("Isend").execute(action); });
+ xbt_replay_action_register("recv", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::RecvAction("recv").execute(action); });
+ xbt_replay_action_register("Irecv", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::RecvAction("Irecv").execute(action); });
+ xbt_replay_action_register("test", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::TestAction().execute(action); });
+ xbt_replay_action_register("wait", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::WaitAction().execute(action); });
xbt_replay_action_register("waitAll", simgrid::smpi::action_waitall);
xbt_replay_action_register("barrier", simgrid::smpi::action_barrier);
xbt_replay_action_register("bcast", simgrid::smpi::action_bcast);
xbt_replay_action_register("allGather", simgrid::smpi::action_allgather);
xbt_replay_action_register("allGatherV", simgrid::smpi::action_allgatherv);
xbt_replay_action_register("reduceScatter", simgrid::smpi::action_reducescatter);
- xbt_replay_action_register("compute", simgrid::smpi::action_compute);
+ xbt_replay_action_register("compute", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::Replay::ComputeAction().execute(action); });
//if we have a delayed start, sleep here.
if(*argc>2){