X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/aba5e7d3fbc0cd23b5a212c2ab398c1b30a5fc0c..HEAD:/src/smpi/internals/smpi_replay.cpp diff --git a/src/smpi/internals/smpi_replay.cpp b/src/smpi/internals/smpi_replay.cpp index c94b93c13a..701323fb28 100644 --- a/src/smpi/internals/smpi_replay.cpp +++ b/src/smpi/internals/smpi_replay.cpp @@ -1,18 +1,19 @@ -/* Copyright (c) 2009-2021. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2009-2023. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ +#include "smpi_replay.hpp" +#include "simgrid/s4u/Exec.hpp" #include "smpi_coll.hpp" #include "smpi_comm.hpp" +#include "smpi_config.hpp" #include "smpi_datatype.hpp" #include "smpi_group.hpp" #include "smpi_request.hpp" -#include "smpi_config.hpp" -#include "simgrid/s4u/Exec.hpp" +#include "src/smpi/include/private.hpp" #include "xbt/replay.hpp" -#include -#include +#include "xbt/str.h" #include #include @@ -23,7 +24,6 @@ #include XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_replay, smpi, "Trace Replay with SMPI"); - // From https://stackoverflow.com/questions/7110301/generic-hash-for-tuples-in-unordered-map-unordered-set // This is all just to make std::unordered_map work with std::tuple. If we need this in other places, // this could go into a header file. @@ -39,7 +39,7 @@ template inline void hash_combine(std::size_t& seed, T const& v) } // Recursive template code derived from Matthieu M. -template ::value - 1> class HashValueImpl { +template - 1> class HashValueImpl { public: static void apply(size_t& seed, Tuple const& tuple) { @@ -97,16 +97,13 @@ static MPI_Datatype parse_datatype(const simgrid::xbt::ReplayAction& action, uns return i < action.size() ? simgrid::smpi::Datatype::decode(action[i]) : simgrid::smpi::replay::MPI_DEFAULT_TYPE; } -namespace simgrid { -namespace smpi { - -namespace replay { +namespace simgrid::smpi::replay { MPI_Datatype MPI_DEFAULT_TYPE; class RequestStorage { private: using req_key_t = std::tuple; - using req_storage_t = std::unordered_map>>; + using req_storage_t = std::unordered_map, hash_tuple::hash>>; req_storage_t store; @@ -118,41 +115,43 @@ public: void get_requests(std::vector& vec) const { - for (auto const& pair : store) { - auto& req = pair.second; + for (auto const& [_, reqs] : store) { aid_t my_proc_id = simgrid::s4u::this_actor::get_pid(); - if (req != MPI_REQUEST_NULL && (req->src() == my_proc_id || req->dst() == my_proc_id)) { - vec.push_back(pair.second); - pair.second->print_request("MM"); + for (const auto& req : reqs) { + if (req != MPI_REQUEST_NULL && (req->src() == my_proc_id || req->dst() == my_proc_id)) { + vec.push_back(req); + req->print_request("MM"); + } } } } - MPI_Request find(int src, int dst, int tag) - { - auto it = store.find(req_key_t(src, dst, tag)); - return (it == store.end()) ? MPI_REQUEST_NULL : it->second; - } - - void remove(const Request* req) - { - if (req == MPI_REQUEST_NULL) return; - - store.erase(req_key_t(req->src()-1, req->dst()-1, req->tag())); - } + MPI_Request pop(int src, int dst, int tag) + { + auto it = store.find(req_key_t(src, dst, tag)); + if (it == store.end()) + return MPI_REQUEST_NULL; + MPI_Request req = it->second.front(); + it->second.pop_front(); + if(it->second.empty()) + store.erase(req_key_t(src, dst, tag)); + return req; + } - void add(MPI_Request req) - { - if (req != MPI_REQUEST_NULL) // Can and does happen in the case of TestAction - store.insert({req_key_t(req->src()-1, req->dst()-1, req->tag()), req}); + void add(MPI_Request req) + { + if (req != MPI_REQUEST_NULL){ // Can and does happen in the case of TestAction + store[req_key_t(req->src()-1, req->dst()-1, req->tag())].push_back(req); } + } - /* Sometimes we need to re-insert MPI_REQUEST_NULL but we still need src,dst and tag */ - void addNullRequest(int src, int dst, int tag) - { - store.insert({req_key_t(MPI_COMM_WORLD->group()->actor(src) - 1, MPI_COMM_WORLD->group()->actor(dst) - 1, tag), - MPI_REQUEST_NULL}); - } + /* Sometimes we need to re-insert MPI_REQUEST_NULL but we still need src,dst and tag */ + void addNullRequest(int src, int dst, int tag) + { + int src_pid = MPI_COMM_WORLD->group()->actor(src) - 1; + int dest_pid = MPI_COMM_WORLD->group()->actor(dst) - 1; + store[req_key_t(src_pid, dest_pid, tag)].push_back(MPI_REQUEST_NULL); + } }; void WaitTestParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) @@ -163,12 +162,12 @@ void WaitTestParser::parse(simgrid::xbt::ReplayAction& action, const std::string tag = std::stoi(action[4]); } -void SendRecvParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) +void SendOrRecvParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) { CHECK_ACTION_PARAMS(action, 3, 1) partner = std::stoi(action[2]); tag = std::stoi(action[3]); - size = parse_integer(action[4]); + size = parse_integer(action[4]); datatype1 = parse_datatype(action, 5); } @@ -187,10 +186,21 @@ void SleepParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) void LocationParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) { CHECK_ACTION_PARAMS(action, 2, 0) - filename = std::string(action[2]); + filename = action[2]; line = std::stoi(action[3]); } +void SendRecvParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) +{ + CHECK_ACTION_PARAMS(action, 6, 0) + sendcount = parse_integer(action[2]); + dst = std::stoi(action[3]); + recvcount = parse_integer(action[4]); + src = std::stoi(action[5]); + datatype1 = parse_datatype(action, 6); + datatype2 = parse_datatype(action, 7); +} + void BcastArgParser::parse(simgrid::xbt::ReplayAction& action, const std::string&) { CHECK_ACTION_PARAMS(action, 1, 2) @@ -421,8 +431,7 @@ void WaitAction::kernel(simgrid::xbt::ReplayAction& action) std::string s = boost::algorithm::join(action, " "); xbt_assert(req_storage.size(), "action wait not preceded by any irecv or isend: %s", s.c_str()); const WaitTestParser& args = get_args(); - MPI_Request request = req_storage.find(args.src, args.dst, args.tag); - req_storage.remove(request); + MPI_Request request = req_storage.pop(args.src, args.dst, args.tag); if (request == MPI_REQUEST_NULL) { /* Assume that the trace is well formed, meaning the comm might have been caught by an MPI_test. Then just @@ -434,11 +443,12 @@ void WaitAction::kernel(simgrid::xbt::ReplayAction& action) // MPI_REQUEST_NULL by Request::wait! bool is_wait_for_receive = (request->flags() & MPI_REQ_RECV); - TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::WaitTIData(args.src, args.dst, args.tag)); + TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::WaitTIData("wait", args.src, args.dst, args.tag)); MPI_Status status; Request::wait(&request, &status); - + if(request!=MPI_REQUEST_NULL) + Request::unref(&request); TRACE_smpi_comm_out(get_pid()); if (is_wait_for_receive) TRACE_smpi_recv(MPI_COMM_WORLD->group()->actor(args.src), MPI_COMM_WORLD->group()->actor(args.dst), args.tag); @@ -446,7 +456,7 @@ void WaitAction::kernel(simgrid::xbt::ReplayAction& action) void SendAction::kernel(simgrid::xbt::ReplayAction&) { - const SendRecvParser& args = get_args(); + const SendOrRecvParser& args = get_args(); aid_t dst_traced = MPI_COMM_WORLD->group()->actor(args.partner); TRACE_smpi_comm_in( @@ -469,15 +479,15 @@ void SendAction::kernel(simgrid::xbt::ReplayAction&) void RecvAction::kernel(simgrid::xbt::ReplayAction&) { - const SendRecvParser& args = get_args(); + const SendOrRecvParser& args = get_args(); TRACE_smpi_comm_in( get_pid(), __func__, new simgrid::instr::Pt2PtTIData(get_name(), args.partner, args.size, args.tag, Datatype::encode(args.datatype1))); MPI_Status status; // unknown size from the receiver point of view - size_t arg_size = args.size; - if (arg_size == 0) { + ssize_t arg_size = args.size; + if (arg_size < 0) { Request::probe(args.partner, args.tag, MPI_COMM_WORLD, &status); arg_size = status.count; } @@ -500,6 +510,39 @@ void RecvAction::kernel(simgrid::xbt::ReplayAction&) } } +void SendRecvAction::kernel(simgrid::xbt::ReplayAction&) +{ + XBT_DEBUG("Enters SendRecv"); + const SendRecvParser& args = get_args(); + aid_t my_proc_id = simgrid::s4u::this_actor::get_pid(); + aid_t src_traced = MPI_COMM_WORLD->group()->actor(args.src); + aid_t dst_traced = MPI_COMM_WORLD->group()->actor(args.dst); + + MPI_Status status; + int sendtag=0; + int recvtag=0; + + // FIXME: Hack the way to trace this one + auto dst_hack = std::make_shared>(); + auto src_hack = std::make_shared>(); + dst_hack->push_back(dst_traced); + src_hack->push_back(src_traced); + TRACE_smpi_comm_in(my_proc_id, __func__, + new simgrid::instr::VarCollTIData( + "sendRecv", -1, args.sendcount, + dst_hack, args.recvcount, src_hack, + simgrid::smpi::Datatype::encode(args.datatype1), simgrid::smpi::Datatype::encode(args.datatype2))); + + TRACE_smpi_send(my_proc_id, my_proc_id, dst_traced, sendtag, args.sendcount * args.datatype1->size()); + + simgrid::smpi::Request::sendrecv(nullptr, args.sendcount, args.datatype1, args.dst, sendtag, nullptr, args.recvcount, args.datatype2, args.src, + recvtag, MPI_COMM_WORLD, &status); + + TRACE_smpi_recv(src_traced, my_proc_id, recvtag); + TRACE_smpi_comm_out(my_proc_id); + XBT_DEBUG("Exits SendRecv"); +} + void ComputeAction::kernel(simgrid::xbt::ReplayAction&) { const ComputeParser& args = get_args(); @@ -521,19 +564,18 @@ void SleepAction::kernel(simgrid::xbt::ReplayAction&) void LocationAction::kernel(simgrid::xbt::ReplayAction&) { const LocationParser& args = get_args(); - smpi_trace_set_call_location(args.filename.c_str(), args.line); + smpi_trace_set_call_location(args.filename.c_str(), args.line, "replay_action"); } void TestAction::kernel(simgrid::xbt::ReplayAction&) { const WaitTestParser& args = get_args(); - MPI_Request request = req_storage.find(args.src, args.dst, args.tag); - req_storage.remove(request); + MPI_Request request = req_storage.pop(args.src, args.dst, args.tag); // if request is null here, this may mean that a previous test has succeeded // Different times in traced application and replayed version may lead to this // In this case, ignore the extra calls. if (request != MPI_REQUEST_NULL) { - TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::NoOpTIData("test")); + TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::WaitTIData("test", args.src, args.dst, args.tag)); MPI_Status status; int flag = 0; @@ -568,13 +610,12 @@ void CommunicatorAction::kernel(simgrid::xbt::ReplayAction&) void WaitAllAction::kernel(simgrid::xbt::ReplayAction&) { - const size_t count_requests = req_storage.size(); - - if (count_requests > 0) { - TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::CpuTIData("waitall", count_requests)); + if (req_storage.size() > 0) { std::vector> sender_receiver; std::vector reqs; req_storage.get_requests(reqs); + unsigned long count_requests = reqs.size(); + TRACE_smpi_comm_in(get_pid(), __func__, new simgrid::instr::CpuTIData("waitall", count_requests)); for (auto const& req : reqs) { if (req && (req->flags() & MPI_REQ_RECV)) { sender_receiver.emplace_back(req->src(), req->dst()); @@ -583,8 +624,12 @@ void WaitAllAction::kernel(simgrid::xbt::ReplayAction&) Request::waitall(count_requests, &(reqs.data())[0], MPI_STATUSES_IGNORE); req_storage.get_store().clear(); - for (auto const& pair : sender_receiver) { - TRACE_smpi_recv(pair.first, pair.second, 0); + for (MPI_Request& req : reqs) + if (req != MPI_REQUEST_NULL) + Request::unref(&req); + + for (auto const& [src, dst] : sender_receiver) { + TRACE_smpi_recv(src, dst, 0); } TRACE_smpi_comm_out(get_pid()); } @@ -744,7 +789,9 @@ void ReduceScatterAction::kernel(simgrid::xbt::ReplayAction&) TRACE_smpi_comm_in( get_pid(), "action_reducescatter", new simgrid::instr::VarCollTIData(get_name(), -1, -1, nullptr, -1, args.recvcounts, - std::to_string(args.comp_size), + /* ugly as we use datatype field to pass computation as string */ + /* and because of the trick to avoid getting 0.000000 when 0 is given */ + args.comp_size == 0 ? "0" : std::to_string(args.comp_size), Datatype::encode(args.datatype1))); colls::reduce_scatter(send_buffer(args.recv_size_sum * args.datatype1->size()), @@ -795,8 +842,7 @@ void AllToAllVAction::kernel(simgrid::xbt::ReplayAction&) TRACE_smpi_comm_out(get_pid()); } -} // Replay Namespace -}} // namespace simgrid::smpi +} // namespace simgrid::smpi::replay static std::unordered_map storage; /** @brief Only initialize the replay, don't do it for real */ @@ -822,6 +868,7 @@ void smpi_replay_init(const char* instance_id, int rank, double start_delay_flop xbt_replay_action_register("recv", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::RecvAction("recv", storage[simgrid::s4u::this_actor::get_pid()]).execute(action); }); xbt_replay_action_register("irecv", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::RecvAction("irecv", storage[simgrid::s4u::this_actor::get_pid()]).execute(action); }); xbt_replay_action_register("test", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::TestAction(storage[simgrid::s4u::this_actor::get_pid()]).execute(action); }); + xbt_replay_action_register("sendRecv", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::SendRecvAction().execute(action); }); xbt_replay_action_register("wait", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::WaitAction(storage[simgrid::s4u::this_actor::get_pid()]).execute(action); }); xbt_replay_action_register("waitall", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::WaitAllAction(storage[simgrid::s4u::this_actor::get_pid()]).execute(action); }); xbt_replay_action_register("barrier", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::BarrierAction().execute(action); }); @@ -872,14 +919,16 @@ void smpi_replay_main(int rank, const char* private_trace_filename) std::vector requests(count_requests); unsigned int i=0; - for (auto const& pair : storage[simgrid::s4u::this_actor::get_pid()].get_store()) { - requests[i] = pair.second; + for (auto const& [_, reqs] : storage[simgrid::s4u::this_actor::get_pid()].get_store()) { + for (const auto& req : reqs) { + requests[i] = req; // FIXME: overwritten at each iteration? + } i++; } simgrid::smpi::Request::waitall(count_requests, requests.data(), MPI_STATUSES_IGNORE); } - if(simgrid::config::get_value("smpi/finalization-barrier")) + if (simgrid::config::get_value("smpi/barrier-finalization")) simgrid::smpi::colls::barrier(MPI_COMM_WORLD); active_processes--;