Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Use std::function for Comm callbacks.
[simgrid.git] / src / s4u / s4u_Comm.cpp
1 /* Copyright (c) 2006-2022. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include <cmath>
7 #include <simgrid/Exception.hpp>
8 #include <simgrid/comm.h>
9 #include <simgrid/s4u/Comm.hpp>
10 #include <simgrid/s4u/Engine.hpp>
11 #include <simgrid/s4u/Mailbox.hpp>
12
13 #include "mc/mc.h"
14 #include "src/kernel/activity/CommImpl.hpp"
15 #include "src/kernel/actor/ActorImpl.hpp"
16 #include "src/kernel/actor/SimcallObserver.hpp"
17 #include "src/mc/mc_replay.hpp"
18
19 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(s4u_comm, s4u_activity, "S4U asynchronous communications");
20
21 namespace simgrid {
22 namespace s4u {
23 xbt::signal<void(Comm const&)> Comm::on_send;
24 xbt::signal<void(Comm const&)> Comm::on_recv;
25 xbt::signal<void(Comm const&)> Comm::on_completion;
26
27 CommPtr Comm::set_copy_data_callback(const std::function<void(kernel::activity::CommImpl*, void*, size_t)>& callback)
28 {
29   copy_data_function_ = callback;
30   return this;
31 }
32
33 void Comm::copy_buffer_callback(kernel::activity::CommImpl* comm, void* buff, size_t buff_size)
34 {
35   XBT_DEBUG("Copy the data over");
36   memcpy(comm->dst_buff_, buff, buff_size);
37   if (comm->is_detached()) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the
38                              // original buffer available to the application ASAP
39     xbt_free(buff);
40     comm->src_buff_ = nullptr;
41   }
42 }
43
44 void Comm::copy_pointer_callback(kernel::activity::CommImpl* comm, void* buff, size_t buff_size)
45 {
46   xbt_assert((buff_size == sizeof(void*)), "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
47   *(void**)(comm->dst_buff_) = buff;
48 }
49
50 Comm::~Comm()
51 {
52   if (state_ == State::STARTED && not detached_ &&
53       (pimpl_ == nullptr || pimpl_->get_state() == kernel::activity::State::RUNNING)) {
54     XBT_INFO("Comm %p freed before its completion. Did you forget to detach it? (state: %s)", this, get_state_str());
55     if (pimpl_ != nullptr)
56       XBT_INFO("pimpl_->state: %s", pimpl_->get_state_str());
57     else
58       XBT_INFO("pimpl_ is null");
59     xbt_backtrace_display_current();
60   }
61 }
62
63 void Comm::send(kernel::actor::ActorImpl* sender, const Mailbox* mbox, double task_size, double rate, void* src_buff,
64                 size_t src_buff_size,
65                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
66                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
67                 void* data, double timeout)
68 {
69   /* checking for infinite values */
70   xbt_assert(std::isfinite(task_size), "task_size is not finite!");
71   xbt_assert(std::isfinite(rate), "rate is not finite!");
72   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
73
74   xbt_assert(mbox, "No rendez-vous point defined for send");
75
76   if (MC_is_active() || MC_record_replay_is_active()) {
77     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
78     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
79
80     simgrid::kernel::actor::CommIsendSimcall send_observer{
81         sender,  mbox->get_impl(), task_size, rate, static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
82         nullptr, copy_data_fun,    data,      false};
83     comm = simgrid::kernel::actor::simcall_answered(
84         [&send_observer] { return simgrid::kernel::activity::CommImpl::isend(&send_observer); }, &send_observer);
85
86     simgrid::kernel::actor::ActivityWaitSimcall wait_observer{sender, comm.get(), timeout};
87     if (simgrid::kernel::actor::simcall_blocking(
88             [&wait_observer] {
89               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
90             },
91             &wait_observer)) {
92       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
93     }
94     comm = nullptr;
95   } else {
96     simgrid::kernel::actor::CommIsendSimcall observer(sender, mbox->get_impl(), task_size, rate,
97                                                       static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
98                                                       nullptr, copy_data_fun, data, false);
99     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
100       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::isend(&observer);
101       comm->wait_for(observer.get_issuer(), timeout);
102     });
103   }
104 }
105
106 void Comm::recv(kernel::actor::ActorImpl* receiver, const Mailbox* mbox, void* dst_buff, size_t* dst_buff_size,
107                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
108                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
109                 void* data, double timeout, double rate)
110 {
111   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
112   xbt_assert(mbox, "No rendez-vous point defined for recv");
113
114   if (MC_is_active() || MC_record_replay_is_active()) {
115     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
116     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
117
118     simgrid::kernel::actor::CommIrecvSimcall observer{receiver,
119                                                       mbox->get_impl(),
120                                                       static_cast<unsigned char*>(dst_buff),
121                                                       dst_buff_size,
122                                                       match_fun,
123                                                       copy_data_fun,
124                                                       data,
125                                                       rate};
126     comm = simgrid::kernel::actor::simcall_answered(
127         [&observer] { return simgrid::kernel::activity::CommImpl::irecv(&observer); }, &observer);
128
129     simgrid::kernel::actor::ActivityWaitSimcall wait_observer{receiver, comm.get(), timeout};
130     if (simgrid::kernel::actor::simcall_blocking(
131             [&wait_observer] {
132               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
133             },
134             &wait_observer)) {
135       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
136     }
137     comm = nullptr;
138   } else {
139     simgrid::kernel::actor::CommIrecvSimcall observer(receiver, mbox->get_impl(), static_cast<unsigned char*>(dst_buff),
140                                                       dst_buff_size, match_fun, copy_data_fun, data, rate);
141     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
142       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::irecv(&observer);
143       comm->wait_for(observer.get_issuer(), timeout);
144     });
145   }
146 }
147
148 CommPtr Comm::sendto_init()
149 {
150   CommPtr res(new Comm());
151   res->pimpl_ = kernel::activity::CommImplPtr(new kernel::activity::CommImpl());
152   boost::static_pointer_cast<kernel::activity::CommImpl>(res->pimpl_)->detach();
153   res->sender_ = kernel::actor::ActorImpl::self();
154   return res;
155 }
156
157 CommPtr Comm::sendto_init(Host* from, Host* to)
158 {
159   auto res = Comm::sendto_init()->set_source(from)->set_destination(to);
160   res->set_state(State::STARTING);
161   return res;
162 }
163
164 CommPtr Comm::sendto_async(Host* from, Host* to, uint64_t simulated_size_in_bytes)
165 {
166   return Comm::sendto_init()->set_payload_size(simulated_size_in_bytes)->set_source(from)->set_destination(to);
167 }
168
169 void Comm::sendto(Host* from, Host* to, uint64_t simulated_size_in_bytes)
170 {
171   sendto_async(from, to, simulated_size_in_bytes)->wait();
172 }
173
174 CommPtr Comm::set_source(Host* from)
175 {
176   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
177              "Cannot change the source of a Comm once it's started (state: %s)", to_c_str(state_));
178   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_source(from);
179   // Setting 'source' may allow to start the activity, let's try
180   if (state_ == State::STARTING && remains_ <= 0)
181     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
182   else
183     vetoable_start();
184
185   return this;
186 }
187 Host* Comm::get_source() const
188 {
189   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_source() : nullptr;
190 }
191
192 CommPtr Comm::set_destination(Host* to)
193 {
194   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
195              "Cannot change the destination of a Comm once it's started (state: %s)", to_c_str(state_));
196   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_destination(to);
197   // Setting 'destination' may allow to start the activity, let's try
198   if (state_ == State::STARTING && remains_ <= 0)
199     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
200   else
201     vetoable_start();
202
203   return this;
204 }
205
206 Host* Comm::get_destination() const
207 {
208   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_destination() : nullptr;
209 }
210
211 CommPtr Comm::set_rate(double rate)
212 {
213   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
214              __FUNCTION__);
215   rate_ = rate;
216   return this;
217 }
218
219 CommPtr Comm::set_mailbox(Mailbox* mailbox)
220 {
221   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
222              __FUNCTION__);
223   mailbox_ = mailbox;
224   return this;
225 }
226
227 CommPtr Comm::set_src_data(void* buff)
228 {
229   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
230              __FUNCTION__);
231   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
232   src_buff_ = buff;
233   return this;
234 }
235
236 CommPtr Comm::set_src_data_size(size_t size)
237 {
238   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
239              __FUNCTION__);
240   src_buff_size_ = size;
241   return this;
242 }
243
244 CommPtr Comm::set_src_data(void* buff, size_t size)
245 {
246   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
247              __FUNCTION__);
248
249   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
250   src_buff_      = buff;
251   src_buff_size_ = size;
252   return this;
253 }
254
255 CommPtr Comm::set_dst_data(void** buff)
256 {
257   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
258              __FUNCTION__);
259   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
260   dst_buff_ = buff;
261   return this;
262 }
263
264 CommPtr Comm::set_dst_data(void** buff, size_t size)
265 {
266   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
267              __FUNCTION__);
268
269   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
270   dst_buff_      = buff;
271   dst_buff_size_ = size;
272   return this;
273 }
274
275 CommPtr Comm::set_payload_size(uint64_t bytes)
276 {
277   Activity::set_remaining(bytes);
278   if (pimpl_) {
279     boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_size(bytes);
280   }
281   return this;
282 }
283
284 Actor* Comm::get_sender() const
285 {
286   kernel::actor::ActorImplPtr sender = nullptr;
287   if (pimpl_)
288     sender = boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->src_actor_;
289   return sender ? sender->get_ciface() : nullptr;
290 }
291
292 bool Comm::is_assigned() const
293 {
294   return (pimpl_ && boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->is_assigned()) ||
295          mailbox_ != nullptr;
296 }
297
298 Comm* Comm::start()
299 {
300   xbt_assert(get_state() == State::INITED || get_state() == State::STARTING,
301              "You cannot use %s() once your communication started (not implemented)", __FUNCTION__);
302   if (get_source() != nullptr || get_destination() != nullptr) {
303     xbt_assert(is_assigned(), "When either from_ or to_ is specified, both must be.");
304     xbt_assert(src_buff_ == nullptr && dst_buff_ == nullptr,
305                "Direct host-to-host communications cannot carry any data.");
306     XBT_DEBUG("host-to-host Comm. Pimpl already created and set, just start it.");
307     kernel::actor::simcall_answered([this] {
308       pimpl_->set_state(kernel::activity::State::READY);
309       boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->start();
310     });
311   } else if (src_buff_ != nullptr) { // Sender side
312     on_send(*this);
313     kernel::actor::CommIsendSimcall observer{sender_,
314                                              mailbox_->get_impl(),
315                                              remains_,
316                                              rate_,
317                                              static_cast<unsigned char*>(src_buff_),
318                                              src_buff_size_,
319                                              match_fun_,
320                                              clean_fun_,
321                                              copy_data_function_,
322                                              get_data<void>(),
323                                              detached_};
324     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::isend(&observer); },
325                                              &observer);
326   } else if (dst_buff_ != nullptr) { // Receiver side
327     xbt_assert(not detached_, "Receive cannot be detached");
328     on_recv(*this);
329     kernel::actor::CommIrecvSimcall observer{receiver_,
330                                              mailbox_->get_impl(),
331                                              static_cast<unsigned char*>(dst_buff_),
332                                              &dst_buff_size_,
333                                              match_fun_,
334                                              copy_data_function_,
335                                              get_data<void>(),
336                                              rate_};
337     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::irecv(&observer); },
338                                              &observer);
339   } else {
340     xbt_die("Cannot start a communication before specifying whether we are the sender or the receiver");
341   }
342
343   if (suspended_)
344     pimpl_->suspend();
345
346   if (not detached_) {
347     pimpl_->set_iface(this);
348     pimpl_->set_actor(sender_);
349   }
350
351   state_ = State::STARTED;
352   return this;
353 }
354
355 Comm* Comm::detach()
356 {
357   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
358              "You cannot use %s() once your communication is %s (not implemented)", __FUNCTION__, get_state_str());
359   xbt_assert(dst_buff_ == nullptr && dst_buff_size_ == 0, "You can only detach sends, not recvs");
360   detached_ = true;
361   vetoable_start();
362   return this;
363 }
364
365 ssize_t Comm::test_any(const std::vector<CommPtr>& comms)
366 {
367   std::vector<ActivityPtr> activities;
368   for (const auto& comm : comms)
369     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
370   return Activity::test_any(activities);
371 }
372
373 /** @brief Block the calling actor until the communication is finished, or until timeout
374  *
375  * On timeout, an exception is thrown and the communication is invalidated.
376  *
377  * @param timeout the amount of seconds to wait for the comm termination.
378  *                Negative values denote infinite wait times. 0 as a timeout returns immediately. */
379 Comm* Comm::wait_for(double timeout)
380 {
381   XBT_DEBUG("Calling Comm::wait_for with state %s", get_state_str());
382   kernel::actor::ActorImpl* issuer = nullptr;
383   switch (state_) {
384     case State::FINISHED:
385       break;
386     case State::FAILED:
387       throw NetworkFailureException(XBT_THROW_POINT, "Cannot wait for a failed communication");
388     case State::INITED:
389     case State::STARTING: // It's not started yet. Do it in one simcall if it's a regular communication
390       if (get_source() != nullptr || get_destination() != nullptr) {
391         return vetoable_start()->wait_for(timeout); // In the case of host2host comm, do it in two simcalls
392       } else if (src_buff_ != nullptr) {
393         on_send(*this);
394         send(sender_, mailbox_, remains_, rate_, src_buff_, src_buff_size_, match_fun_, copy_data_function_,
395              get_data<void>(), timeout);
396
397       } else { // Receiver
398         on_recv(*this);
399         recv(receiver_, mailbox_, dst_buff_, &dst_buff_size_, match_fun_, copy_data_function_, get_data<void>(),
400              timeout, rate_);
401       }
402       break;
403     case State::STARTED:
404       try {
405         issuer = kernel::actor::ActorImpl::self();
406         kernel::actor::ActivityWaitSimcall observer{issuer, pimpl_.get(), timeout};
407         if (kernel::actor::simcall_blocking(
408                 [&observer] { observer.get_activity()->wait_for(observer.get_issuer(), observer.get_timeout()); },
409                 &observer)) {
410           throw TimeoutException(XBT_THROW_POINT, "Timeouted");
411         }
412       } catch (const NetworkFailureException& e) {
413         issuer->simcall_.observer_ = nullptr; // Comm failed on network failure, reset the observer to nullptr
414         complete(State::FAILED);
415         e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
416       }
417       break;
418
419     case State::CANCELED:
420       throw CancelException(XBT_THROW_POINT, "Communication canceled");
421
422     default:
423       THROW_IMPOSSIBLE;
424   }
425   complete(State::FINISHED);
426   return this;
427 }
428
429 ssize_t Comm::wait_any_for(const std::vector<CommPtr>& comms, double timeout)
430 {
431   std::vector<ActivityPtr> activities;
432   for (const auto& comm : comms)
433     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
434   ssize_t changed_pos;
435   try {
436     changed_pos = Activity::wait_any_for(activities, timeout);
437   } catch (const NetworkFailureException& e) {
438     changed_pos = -1;
439     for (auto c : comms) {
440       if (c->pimpl_->get_state() == kernel::activity::State::FAILED) {
441         c->complete(State::FAILED);
442       }
443     }
444     e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
445   }
446   return changed_pos;
447 }
448
449 void Comm::wait_all(const std::vector<CommPtr>& comms)
450 {
451   // TODO: this should be a simcall or something
452   for (auto& comm : comms)
453     comm->wait();
454 }
455
456 size_t Comm::wait_all_for(const std::vector<CommPtr>& comms, double timeout)
457 {
458   if (timeout < 0.0) {
459     wait_all(comms);
460     return comms.size();
461   }
462
463   double deadline = Engine::get_clock() + timeout;
464   std::vector<CommPtr> waited_comm(1, nullptr);
465   for (size_t i = 0; i < comms.size(); i++) {
466     double wait_timeout = std::max(0.0, deadline - Engine::get_clock());
467     waited_comm[0]      = comms[i];
468     // Using wait_any_for() here (and not wait_for) because we don't want comms to be invalidated on timeout
469     if (wait_any_for(waited_comm, wait_timeout) == -1) {
470       XBT_DEBUG("Timeout (%g): i = %zu", wait_timeout, i);
471       return i;
472     }
473   }
474   return comms.size();
475 }
476 } // namespace s4u
477 } // namespace simgrid
478 /* **************************** Public C interface *************************** */
479 void sg_comm_detach(sg_comm_t comm, void (*clean_function)(void*))
480 {
481   comm->detach(clean_function);
482   comm->unref();
483 }
484 void sg_comm_unref(sg_comm_t comm)
485 {
486   comm->unref();
487 }
488 int sg_comm_test(sg_comm_t comm)
489 {
490   bool finished = comm->test();
491   if (finished)
492     comm->unref();
493   return finished;
494 }
495
496 sg_error_t sg_comm_wait(sg_comm_t comm)
497 {
498   return sg_comm_wait_for(comm, -1);
499 }
500
501 sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout)
502 {
503   sg_error_t status = SG_OK;
504
505   simgrid::s4u::CommPtr s4u_comm(comm, false);
506   try {
507     s4u_comm->wait_for(timeout);
508   } catch (const simgrid::TimeoutException&) {
509     status = SG_ERROR_TIMEOUT;
510   } catch (const simgrid::CancelException&) {
511     status = SG_ERROR_CANCELED;
512   } catch (const simgrid::NetworkFailureException&) {
513     status = SG_ERROR_NETWORK;
514   }
515   return status;
516 }
517
518 void sg_comm_wait_all(sg_comm_t* comms, size_t count)
519 {
520   sg_comm_wait_all_for(comms, count, -1);
521 }
522
523 size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout)
524 {
525   std::vector<simgrid::s4u::CommPtr> s4u_comms;
526   for (size_t i = 0; i < count; i++)
527     s4u_comms.emplace_back(comms[i], false);
528
529   size_t pos = simgrid::s4u::Comm::wait_all_for(s4u_comms, timeout);
530   for (size_t i = pos; i < count; i++)
531     s4u_comms[i]->add_ref();
532   return pos;
533 }
534
535 ssize_t sg_comm_wait_any(sg_comm_t* comms, size_t count)
536 {
537   return sg_comm_wait_any_for(comms, count, -1);
538 }
539
540 ssize_t sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout)
541 {
542   std::vector<simgrid::s4u::CommPtr> s4u_comms;
543   for (size_t i = 0; i < count; i++)
544     s4u_comms.emplace_back(comms[i], false);
545
546   ssize_t pos = simgrid::s4u::Comm::wait_any_for(s4u_comms, timeout);
547   for (size_t i = 0; i < count; i++) {
548     if (pos != -1 && static_cast<size_t>(pos) != i)
549       s4u_comms[i]->add_ref();
550   }
551   return pos;
552 }