Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Update copyright lines for 2023.
[simgrid.git] / src / s4u / s4u_Comm.cpp
1 /* Copyright (c) 2006-2023. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include <cmath>
7 #include <simgrid/Exception.hpp>
8 #include <simgrid/comm.h>
9 #include <simgrid/s4u/Comm.hpp>
10 #include <simgrid/s4u/Engine.hpp>
11 #include <simgrid/s4u/Mailbox.hpp>
12
13 #include "mc/mc.h"
14 #include "src/kernel/activity/CommImpl.hpp"
15 #include "src/kernel/actor/ActorImpl.hpp"
16 #include "src/kernel/actor/SimcallObserver.hpp"
17 #include "src/mc/mc_replay.hpp"
18
19 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(s4u_comm, s4u_activity, "S4U asynchronous communications");
20
21 namespace simgrid::s4u {
22 xbt::signal<void(Comm const&)> Comm::on_send;
23 xbt::signal<void(Comm const&)> Comm::on_recv;
24
25 CommPtr Comm::set_copy_data_callback(const std::function<void(kernel::activity::CommImpl*, void*, size_t)>& callback)
26 {
27   copy_data_function_ = callback;
28   return this;
29 }
30
31 void Comm::copy_buffer_callback(kernel::activity::CommImpl* comm, void* buff, size_t buff_size)
32 {
33   XBT_DEBUG("Copy the data over");
34   memcpy(comm->dst_buff_, buff, buff_size);
35   if (comm->is_detached()) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the
36                              // original buffer available to the application ASAP
37     xbt_free(buff);
38     comm->src_buff_ = nullptr;
39   }
40 }
41
42 void Comm::copy_pointer_callback(kernel::activity::CommImpl* comm, void* buff, size_t buff_size)
43 {
44   xbt_assert((buff_size == sizeof(void*)), "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
45   *(void**)(comm->dst_buff_) = buff;
46 }
47
48 Comm::~Comm()
49 {
50   if (state_ == State::STARTED && not detached_ &&
51       (pimpl_ == nullptr || pimpl_->get_state() == kernel::activity::State::RUNNING)) {
52     XBT_INFO("Comm %p freed before its completion. Did you forget to detach it? (state: %s)", this, get_state_str());
53     if (pimpl_ != nullptr)
54       XBT_INFO("pimpl_->state: %s", pimpl_->get_state_str());
55     else
56       XBT_INFO("pimpl_ is null");
57     xbt_backtrace_display_current();
58   }
59 }
60
61 void Comm::send(kernel::actor::ActorImpl* sender, const Mailbox* mbox, double task_size, double rate, void* src_buff,
62                 size_t src_buff_size,
63                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
64                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
65                 void* data, double timeout)
66 {
67   /* checking for infinite values */
68   xbt_assert(std::isfinite(task_size), "task_size is not finite!");
69   xbt_assert(std::isfinite(rate), "rate is not finite!");
70   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
71
72   xbt_assert(mbox, "No rendez-vous point defined for send");
73
74   if (MC_is_active() || MC_record_replay_is_active()) {
75     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
76     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
77
78     simgrid::kernel::actor::CommIsendSimcall send_observer{
79         sender,  mbox->get_impl(), task_size, rate, static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
80         nullptr, copy_data_fun,    data,      false};
81     comm = simgrid::kernel::actor::simcall_answered(
82         [&send_observer] { return simgrid::kernel::activity::CommImpl::isend(&send_observer); }, &send_observer);
83
84     if (simgrid::kernel::actor::ActivityWaitSimcall wait_observer{sender, comm.get(), timeout};
85         simgrid::kernel::actor::simcall_blocking(
86             [&wait_observer] {
87               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
88             },
89             &wait_observer)) {
90       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
91     }
92     comm = nullptr;
93   } else {
94     simgrid::kernel::actor::CommIsendSimcall observer(sender, mbox->get_impl(), task_size, rate,
95                                                       static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
96                                                       nullptr, copy_data_fun, data, false);
97     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
98       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::isend(&observer);
99       comm->wait_for(observer.get_issuer(), timeout);
100     });
101   }
102 }
103
104 void Comm::recv(kernel::actor::ActorImpl* receiver, const Mailbox* mbox, void* dst_buff, size_t* dst_buff_size,
105                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
106                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
107                 void* data, double timeout, double rate)
108 {
109   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
110   xbt_assert(mbox, "No rendez-vous point defined for recv");
111
112   if (MC_is_active() || MC_record_replay_is_active()) {
113     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
114     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
115
116     simgrid::kernel::actor::CommIrecvSimcall observer{receiver,
117                                                       mbox->get_impl(),
118                                                       static_cast<unsigned char*>(dst_buff),
119                                                       dst_buff_size,
120                                                       match_fun,
121                                                       copy_data_fun,
122                                                       data,
123                                                       rate};
124     comm = simgrid::kernel::actor::simcall_answered(
125         [&observer] { return simgrid::kernel::activity::CommImpl::irecv(&observer); }, &observer);
126
127     if (simgrid::kernel::actor::ActivityWaitSimcall wait_observer{receiver, comm.get(), timeout};
128         simgrid::kernel::actor::simcall_blocking(
129             [&wait_observer] {
130               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
131             },
132             &wait_observer)) {
133       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
134     }
135     comm = nullptr;
136   } else {
137     simgrid::kernel::actor::CommIrecvSimcall observer(receiver, mbox->get_impl(), static_cast<unsigned char*>(dst_buff),
138                                                       dst_buff_size, match_fun, copy_data_fun, data, rate);
139     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
140       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::irecv(&observer);
141       comm->wait_for(observer.get_issuer(), timeout);
142     });
143   }
144 }
145
146 CommPtr Comm::sendto_init()
147 {
148   CommPtr res(new Comm());
149   res->pimpl_ = kernel::activity::CommImplPtr(new kernel::activity::CommImpl());
150   boost::static_pointer_cast<kernel::activity::CommImpl>(res->pimpl_)->detach();
151   res->sender_ = kernel::actor::ActorImpl::self();
152   return res;
153 }
154
155 CommPtr Comm::sendto_init(Host* from, Host* to)
156 {
157   auto res = Comm::sendto_init()->set_source(from)->set_destination(to);
158   res->set_state(State::STARTING);
159   return res;
160 }
161
162 CommPtr Comm::sendto_async(Host* from, Host* to, uint64_t simulated_size_in_bytes)
163 {
164   return Comm::sendto_init()->set_payload_size(simulated_size_in_bytes)->set_source(from)->set_destination(to);
165 }
166
167 void Comm::sendto(Host* from, Host* to, uint64_t simulated_size_in_bytes)
168 {
169   sendto_async(from, to, simulated_size_in_bytes)->wait();
170 }
171
172 CommPtr Comm::set_source(Host* from)
173 {
174   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
175              "Cannot change the source of a Comm once it's started (state: %s)", to_c_str(state_));
176   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_source(from);
177   // Setting 'source' may allow to start the activity, let's try
178   if (state_ == State::STARTING && remains_ <= 0)
179     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
180   else
181     vetoable_start();
182
183   return this;
184 }
185 Host* Comm::get_source() const
186 {
187   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_source() : nullptr;
188 }
189
190 CommPtr Comm::set_destination(Host* to)
191 {
192   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
193              "Cannot change the destination of a Comm once it's started (state: %s)", to_c_str(state_));
194   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_destination(to);
195   // Setting 'destination' may allow to start the activity, let's try
196   if (state_ == State::STARTING && remains_ <= 0)
197     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
198   else
199     vetoable_start();
200
201   return this;
202 }
203
204 Host* Comm::get_destination() const
205 {
206   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_destination() : nullptr;
207 }
208
209 CommPtr Comm::set_rate(double rate)
210 {
211   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
212              __FUNCTION__);
213   rate_ = rate;
214   return this;
215 }
216
217 CommPtr Comm::set_mailbox(Mailbox* mailbox)
218 {
219   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
220              __FUNCTION__);
221   mailbox_ = mailbox;
222   return this;
223 }
224
225 CommPtr Comm::set_src_data(void* buff)
226 {
227   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
228              __FUNCTION__);
229   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
230   src_buff_ = buff;
231   return this;
232 }
233
234 CommPtr Comm::set_src_data_size(size_t size)
235 {
236   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
237              __FUNCTION__);
238   src_buff_size_ = size;
239   return this;
240 }
241
242 CommPtr Comm::set_src_data(void* buff, size_t size)
243 {
244   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
245              __FUNCTION__);
246
247   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
248   src_buff_      = buff;
249   src_buff_size_ = size;
250   return this;
251 }
252
253 CommPtr Comm::set_dst_data(void** buff)
254 {
255   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
256              __FUNCTION__);
257   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
258   dst_buff_ = buff;
259   return this;
260 }
261
262 CommPtr Comm::set_dst_data(void** buff, size_t size)
263 {
264   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
265              __FUNCTION__);
266
267   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
268   dst_buff_      = buff;
269   dst_buff_size_ = size;
270   return this;
271 }
272
273 CommPtr Comm::set_payload_size(uint64_t bytes)
274 {
275   set_remaining(bytes);
276   if (pimpl_) {
277     boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_size(bytes);
278   }
279   return this;
280 }
281
282 Actor* Comm::get_sender() const
283 {
284   kernel::actor::ActorImplPtr sender = nullptr;
285   if (pimpl_)
286     sender = boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->src_actor_;
287   return sender ? sender->get_ciface() : nullptr;
288 }
289
290 bool Comm::is_assigned() const
291 {
292   return (pimpl_ && boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->is_assigned()) ||
293          mailbox_ != nullptr;
294 }
295
296 Comm* Comm::start()
297 {
298   xbt_assert(get_state() == State::INITED || get_state() == State::STARTING,
299              "You cannot use %s() once your communication started (not implemented)", __FUNCTION__);
300   if (get_source() != nullptr || get_destination() != nullptr) {
301     xbt_assert(is_assigned(), "When either from_ or to_ is specified, both must be.");
302     xbt_assert(src_buff_ == nullptr && dst_buff_ == nullptr,
303                "Direct host-to-host communications cannot carry any data.");
304     XBT_DEBUG("host-to-host Comm. Pimpl already created and set, just start it.");
305     kernel::actor::simcall_answered([this] {
306       pimpl_->set_state(kernel::activity::State::READY);
307       boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->start();
308     });
309   } else if (src_buff_ != nullptr) { // Sender side
310     on_send(*this);
311     kernel::actor::CommIsendSimcall observer{sender_,
312                                              mailbox_->get_impl(),
313                                              remains_,
314                                              rate_,
315                                              static_cast<unsigned char*>(src_buff_),
316                                              src_buff_size_,
317                                              match_fun_,
318                                              clean_fun_,
319                                              copy_data_function_,
320                                              get_data<void>(),
321                                              detached_};
322     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::isend(&observer); },
323                                              &observer);
324   } else if (dst_buff_ != nullptr) { // Receiver side
325     xbt_assert(not detached_, "Receive cannot be detached");
326     on_recv(*this);
327     kernel::actor::CommIrecvSimcall observer{receiver_,
328                                              mailbox_->get_impl(),
329                                              static_cast<unsigned char*>(dst_buff_),
330                                              &dst_buff_size_,
331                                              match_fun_,
332                                              copy_data_function_,
333                                              get_data<void>(),
334                                              rate_};
335     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::irecv(&observer); },
336                                              &observer);
337   } else {
338     xbt_die("Cannot start a communication before specifying whether we are the sender or the receiver");
339   }
340
341   if (suspended_)
342     pimpl_->suspend();
343
344   if (not detached_) {
345     pimpl_->set_iface(this);
346     pimpl_->set_actor(sender_);
347   }
348
349   state_ = State::STARTED;
350   return this;
351 }
352
353 Comm* Comm::detach()
354 {
355   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
356              "You cannot use %s() once your communication is %s (not implemented)", __FUNCTION__, get_state_str());
357   xbt_assert(dst_buff_ == nullptr && dst_buff_size_ == 0, "You can only detach sends, not recvs");
358   detached_ = true;
359   vetoable_start();
360   return this;
361 }
362
363 ssize_t Comm::test_any(const std::vector<CommPtr>& comms)
364 {
365   std::vector<ActivityPtr> activities;
366   for (const auto& comm : comms)
367     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
368   return Activity::test_any(activities);
369 }
370
371 /** @brief Block the calling actor until the communication is finished, or until timeout
372  *
373  * On timeout, an exception is thrown and the communication is invalidated.
374  *
375  * @param timeout the amount of seconds to wait for the comm termination.
376  *                Negative values denote infinite wait times. 0 as a timeout returns immediately. */
377 Comm* Comm::wait_for(double timeout)
378 {
379   XBT_DEBUG("Calling Comm::wait_for with state %s", get_state_str());
380   kernel::actor::ActorImpl* issuer = nullptr;
381   switch (state_) {
382     case State::FINISHED:
383       break;
384     case State::FAILED:
385       throw NetworkFailureException(XBT_THROW_POINT, "Cannot wait for a failed communication");
386     case State::INITED:
387     case State::STARTING: // It's not started yet. Do it in one simcall if it's a regular communication
388       if (get_source() != nullptr || get_destination() != nullptr) {
389         return vetoable_start()->wait_for(timeout); // In the case of host2host comm, do it in two simcalls
390       } else if (src_buff_ != nullptr) {
391         on_send(*this);
392         send(sender_, mailbox_, remains_, rate_, src_buff_, src_buff_size_, match_fun_, copy_data_function_,
393              get_data<void>(), timeout);
394
395       } else { // Receiver
396         on_recv(*this);
397         recv(receiver_, mailbox_, dst_buff_, &dst_buff_size_, match_fun_, copy_data_function_, get_data<void>(),
398              timeout, rate_);
399       }
400       break;
401     case State::STARTED:
402       try {
403         issuer = kernel::actor::ActorImpl::self();
404         kernel::actor::ActivityWaitSimcall observer{issuer, pimpl_.get(), timeout};
405         if (kernel::actor::simcall_blocking(
406                 [&observer] { observer.get_activity()->wait_for(observer.get_issuer(), observer.get_timeout()); },
407                 &observer)) {
408           throw TimeoutException(XBT_THROW_POINT, "Timeouted");
409         }
410       } catch (const NetworkFailureException& e) {
411         issuer->simcall_.observer_ = nullptr; // Comm failed on network failure, reset the observer to nullptr
412         complete(State::FAILED);
413         e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
414       }
415       break;
416
417     case State::CANCELED:
418       throw CancelException(XBT_THROW_POINT, "Communication canceled");
419
420     default:
421       THROW_IMPOSSIBLE;
422   }
423   complete(State::FINISHED);
424   return this;
425 }
426
427 ssize_t Comm::wait_any_for(const std::vector<CommPtr>& comms, double timeout)
428 {
429   std::vector<ActivityPtr> activities;
430   for (const auto& comm : comms)
431     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
432   ssize_t changed_pos;
433   try {
434     changed_pos = Activity::wait_any_for(activities, timeout);
435   } catch (const NetworkFailureException& e) {
436     changed_pos = -1;
437     for (auto c : comms) {
438       if (c->pimpl_->get_state() == kernel::activity::State::FAILED) {
439         c->complete(State::FAILED);
440       }
441     }
442     e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
443   }
444   return changed_pos;
445 }
446
447 void Comm::wait_all(const std::vector<CommPtr>& comms)
448 {
449   // TODO: this should be a simcall or something
450   for (auto& comm : comms)
451     comm->wait();
452 }
453
454 size_t Comm::wait_all_for(const std::vector<CommPtr>& comms, double timeout)
455 {
456   if (timeout < 0.0) {
457     wait_all(comms);
458     return comms.size();
459   }
460
461   double deadline = Engine::get_clock() + timeout;
462   std::vector<CommPtr> waited_comm(1, nullptr);
463   for (size_t i = 0; i < comms.size(); i++) {
464     double wait_timeout = std::max(0.0, deadline - Engine::get_clock());
465     waited_comm[0]      = comms[i];
466     // Using wait_any_for() here (and not wait_for) because we don't want comms to be invalidated on timeout
467     if (wait_any_for(waited_comm, wait_timeout) == -1) {
468       XBT_DEBUG("Timeout (%g): i = %zu", wait_timeout, i);
469       return i;
470     }
471   }
472   return comms.size();
473 }
474 } // namespace simgrid::s4u
475 /* **************************** Public C interface *************************** */
476 void sg_comm_detach(sg_comm_t comm, void (*clean_function)(void*))
477 {
478   comm->detach(clean_function);
479   comm->unref();
480 }
481 void sg_comm_unref(sg_comm_t comm)
482 {
483   comm->unref();
484 }
485 int sg_comm_test(sg_comm_t comm)
486 {
487   bool finished = comm->test();
488   if (finished)
489     comm->unref();
490   return finished;
491 }
492
493 sg_error_t sg_comm_wait(sg_comm_t comm)
494 {
495   return sg_comm_wait_for(comm, -1);
496 }
497
498 sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout)
499 {
500   sg_error_t status = SG_OK;
501
502   simgrid::s4u::CommPtr s4u_comm(comm, false);
503   try {
504     s4u_comm->wait_for(timeout);
505   } catch (const simgrid::TimeoutException&) {
506     status = SG_ERROR_TIMEOUT;
507   } catch (const simgrid::CancelException&) {
508     status = SG_ERROR_CANCELED;
509   } catch (const simgrid::NetworkFailureException&) {
510     status = SG_ERROR_NETWORK;
511   }
512   return status;
513 }
514
515 void sg_comm_wait_all(sg_comm_t* comms, size_t count)
516 {
517   sg_comm_wait_all_for(comms, count, -1);
518 }
519
520 size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout)
521 {
522   std::vector<simgrid::s4u::CommPtr> s4u_comms;
523   for (size_t i = 0; i < count; i++)
524     s4u_comms.emplace_back(comms[i], false);
525
526   size_t pos = simgrid::s4u::Comm::wait_all_for(s4u_comms, timeout);
527   for (size_t i = pos; i < count; i++)
528     s4u_comms[i]->add_ref();
529   return pos;
530 }
531
532 ssize_t sg_comm_wait_any(sg_comm_t* comms, size_t count)
533 {
534   return sg_comm_wait_any_for(comms, count, -1);
535 }
536
537 ssize_t sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout)
538 {
539   std::vector<simgrid::s4u::CommPtr> s4u_comms;
540   for (size_t i = 0; i < count; i++)
541     s4u_comms.emplace_back(comms[i], false);
542
543   ssize_t pos = simgrid::s4u::Comm::wait_any_for(s4u_comms, timeout);
544   for (size_t i = 0; i < count; i++) {
545     if (pos != -1 && static_cast<size_t>(pos) != i)
546       s4u_comms[i]->add_ref();
547   }
548   return pos;
549 }