Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
fire on_start for one-sided communication instead of on_send/recv
[simgrid.git] / src / s4u / s4u_Comm.cpp
1 /* Copyright (c) 2006-2023. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include <cmath>
7 #include <simgrid/Exception.hpp>
8 #include <simgrid/comm.h>
9 #include <simgrid/s4u/Comm.hpp>
10 #include <simgrid/s4u/Engine.hpp>
11 #include <simgrid/s4u/Mailbox.hpp>
12
13 #include "src/kernel/activity/CommImpl.hpp"
14 #include "src/kernel/actor/ActorImpl.hpp"
15 #include "src/kernel/actor/SimcallObserver.hpp"
16 #include "src/mc/mc.h"
17 #include "src/mc/mc_replay.hpp"
18
19 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(s4u_comm, s4u_activity, "S4U asynchronous communications");
20
21 namespace simgrid::s4u {
22 xbt::signal<void(Comm const&)> Comm::on_send;
23 xbt::signal<void(Comm const&)> Comm::on_recv;
24
25 CommPtr Comm::set_copy_data_callback(const std::function<void(kernel::activity::CommImpl*, void*, size_t)>& callback)
26 {
27   copy_data_function_ = callback;
28   return this;
29 }
30
31 void Comm::copy_buffer_callback(kernel::activity::CommImpl* comm, void* buff,
32                                 size_t buff_size) // XBT_ATTRIB_DEPRECATED_v337
33 {
34   XBT_DEBUG("Copy the data over");
35   memcpy(comm->dst_buff_, buff, buff_size);
36   if (comm->is_detached()) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the
37                              // original buffer available to the application ASAP
38     xbt_free(buff);
39     comm->src_buff_ = nullptr;
40   }
41 }
42
43 void Comm::copy_pointer_callback(kernel::activity::CommImpl* comm, void* buff,
44                                  size_t buff_size) // XBT_ATTRIB_DEPRECATED_v337
45 {
46   xbt_assert((buff_size == sizeof(void*)), "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
47   *(void**)(comm->dst_buff_) = buff;
48 }
49
50 Comm::~Comm()
51 {
52   if (state_ == State::STARTED && not detached_ &&
53       (pimpl_ == nullptr || pimpl_->get_state() == kernel::activity::State::RUNNING)) {
54     XBT_INFO("Comm %p freed before its completion. Did you forget to detach it? (state: %s)", this, get_state_str());
55     if (pimpl_ != nullptr)
56       XBT_INFO("pimpl_->state: %s", pimpl_->get_state_str());
57     else
58       XBT_INFO("pimpl_ is null");
59     xbt_backtrace_display_current();
60   }
61   if (pimpl_ != nullptr)
62     pimpl_->set_iface(nullptr);
63 }
64
65 void Comm::send(kernel::actor::ActorImpl* sender, const Mailbox* mbox, double task_size, double rate, void* src_buff,
66                 size_t src_buff_size,
67                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
68                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
69                 void* data, double timeout)
70 {
71   /* checking for infinite values */
72   xbt_assert(std::isfinite(task_size), "task_size is not finite!");
73   xbt_assert(std::isfinite(rate), "rate is not finite!");
74   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
75
76   xbt_assert(mbox, "No rendez-vous point defined for send");
77
78   if (MC_is_active() || MC_record_replay_is_active()) {
79     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
80     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
81
82     simgrid::kernel::actor::CommIsendSimcall send_observer{
83         sender,  mbox->get_impl(), task_size, rate, static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
84         nullptr, copy_data_fun,    data,      false};
85     comm = simgrid::kernel::actor::simcall_answered(
86         [&send_observer] { return simgrid::kernel::activity::CommImpl::isend(&send_observer); }, &send_observer);
87
88     if (simgrid::kernel::actor::ActivityWaitSimcall wait_observer{sender, comm.get(), timeout};
89         simgrid::kernel::actor::simcall_blocking(
90             [&wait_observer] {
91               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
92             },
93             &wait_observer)) {
94       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
95     }
96     comm = nullptr;
97   } else {
98     simgrid::kernel::actor::CommIsendSimcall observer(sender, mbox->get_impl(), task_size, rate,
99                                                       static_cast<unsigned char*>(src_buff), src_buff_size, match_fun,
100                                                       nullptr, copy_data_fun, data, false);
101     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
102       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::isend(&observer);
103       comm->wait_for(observer.get_issuer(), timeout);
104     });
105   }
106 }
107
108 void Comm::recv(kernel::actor::ActorImpl* receiver, const Mailbox* mbox, void* dst_buff, size_t* dst_buff_size,
109                 const std::function<bool(void*, void*, simgrid::kernel::activity::CommImpl*)>& match_fun,
110                 const std::function<void(simgrid::kernel::activity::CommImpl*, void*, size_t)>& copy_data_fun,
111                 void* data, double timeout, double rate)
112 {
113   xbt_assert(std::isfinite(timeout), "timeout is not finite!");
114   xbt_assert(mbox, "No rendez-vous point defined for recv");
115
116   if (MC_is_active() || MC_record_replay_is_active()) {
117     /* the model-checker wants two separate simcalls, and wants comm to be nullptr during the simcall */
118     simgrid::kernel::activity::ActivityImplPtr comm = nullptr;
119
120     simgrid::kernel::actor::CommIrecvSimcall observer{receiver,
121                                                       mbox->get_impl(),
122                                                       static_cast<unsigned char*>(dst_buff),
123                                                       dst_buff_size,
124                                                       match_fun,
125                                                       copy_data_fun,
126                                                       data,
127                                                       rate};
128     comm = simgrid::kernel::actor::simcall_answered(
129         [&observer] { return simgrid::kernel::activity::CommImpl::irecv(&observer); }, &observer);
130
131     if (simgrid::kernel::actor::ActivityWaitSimcall wait_observer{receiver, comm.get(), timeout};
132         simgrid::kernel::actor::simcall_blocking(
133             [&wait_observer] {
134               wait_observer.get_activity()->wait_for(wait_observer.get_issuer(), wait_observer.get_timeout());
135             },
136             &wait_observer)) {
137       throw simgrid::TimeoutException(XBT_THROW_POINT, "Timeouted");
138     }
139     comm = nullptr;
140   } else {
141     simgrid::kernel::actor::CommIrecvSimcall observer(receiver, mbox->get_impl(), static_cast<unsigned char*>(dst_buff),
142                                                       dst_buff_size, match_fun, copy_data_fun, data, rate);
143     simgrid::kernel::actor::simcall_blocking([&observer, timeout] {
144       simgrid::kernel::activity::ActivityImplPtr comm = simgrid::kernel::activity::CommImpl::irecv(&observer);
145       comm->wait_for(observer.get_issuer(), timeout);
146     });
147   }
148 }
149
150 CommPtr Comm::sendto_init()
151 {
152   CommPtr res(new Comm());
153   res->pimpl_ = kernel::activity::CommImplPtr(new kernel::activity::CommImpl());
154   boost::static_pointer_cast<kernel::activity::CommImpl>(res->pimpl_)->detach();
155   res->sender_ = kernel::actor::ActorImpl::self();
156   return res;
157 }
158
159 CommPtr Comm::sendto_init(Host* from, Host* to)
160 {
161   auto res = Comm::sendto_init()->set_source(from)->set_destination(to);
162   res->set_state(State::STARTING);
163   return res;
164 }
165
166 CommPtr Comm::sendto_async(Host* from, Host* to, uint64_t simulated_size_in_bytes)
167 {
168   return Comm::sendto_init()->set_payload_size(simulated_size_in_bytes)->set_source(from)->set_destination(to);
169 }
170
171 void Comm::sendto(Host* from, Host* to, uint64_t simulated_size_in_bytes)
172 {
173   sendto_async(from, to, simulated_size_in_bytes)->wait();
174 }
175
176 CommPtr Comm::set_source(Host* from)
177 {
178   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
179              "Cannot change the source of a Comm once it's started (state: %s)", to_c_str(state_));
180   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_source(from);
181   // Setting 'source' may allow to start the activity, let's try
182   if (state_ == State::STARTING && remains_ <= 0)
183     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
184   else
185     start();
186
187   return this;
188 }
189 Host* Comm::get_source() const
190 {
191   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_source() : nullptr;
192 }
193
194 CommPtr Comm::set_destination(Host* to)
195 {
196   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
197              "Cannot change the destination of a Comm once it's started (state: %s)", to_c_str(state_));
198   boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_destination(to);
199   // Setting 'destination' may allow to start the activity, let's try
200   if (state_ == State::STARTING && remains_ <= 0)
201     XBT_DEBUG("This communication has a payload size of 0 byte. It cannot start yet");
202   else
203     start();
204
205   return this;
206 }
207
208 Host* Comm::get_destination() const
209 {
210   return pimpl_ ? boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->get_destination() : nullptr;
211 }
212
213 CommPtr Comm::set_rate(double rate)
214 {
215   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
216              __FUNCTION__);
217   rate_ = rate;
218   return this;
219 }
220
221 CommPtr Comm::set_mailbox(Mailbox* mailbox)
222 {
223   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
224              __FUNCTION__);
225   mailbox_ = mailbox;
226   return this;
227 }
228
229 CommPtr Comm::set_src_data(void* buff)
230 {
231   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
232              __FUNCTION__);
233   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
234   src_buff_ = buff;
235   return this;
236 }
237
238 CommPtr Comm::set_src_data_size(size_t size)
239 {
240   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
241              __FUNCTION__);
242   src_buff_size_ = size;
243   return this;
244 }
245
246 CommPtr Comm::set_src_data(void* buff, size_t size)
247 {
248   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
249              __FUNCTION__);
250
251   xbt_assert(dst_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
252   src_buff_      = buff;
253   src_buff_size_ = size;
254   return this;
255 }
256
257 CommPtr Comm::set_dst_data(void** buff)
258 {
259   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
260              __FUNCTION__);
261   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
262   dst_buff_ = buff;
263   return this;
264 }
265
266 CommPtr Comm::set_dst_data(void** buff, size_t size)
267 {
268   xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
269              __FUNCTION__);
270
271   xbt_assert(src_buff_ == nullptr, "Cannot set the src and dst buffers at the same time");
272   dst_buff_      = buff;
273   dst_buff_size_ = size;
274   return this;
275 }
276
277 CommPtr Comm::set_payload_size(uint64_t bytes)
278 {
279   set_remaining(bytes);
280   if (pimpl_) {
281     boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->set_size(bytes);
282   }
283   return this;
284 }
285
286 Actor* Comm::get_sender() const
287 {
288   kernel::actor::ActorImplPtr sender = nullptr;
289   if (pimpl_)
290     sender = boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->src_actor_;
291   return sender ? sender->get_ciface() : nullptr;
292 }
293
294 Actor* Comm::get_receiver() const
295 {
296   kernel::actor::ActorImplPtr receiver = nullptr;
297   if (pimpl_)
298     receiver = boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->dst_actor_;
299   return receiver ? receiver->get_ciface() : nullptr;
300 }
301
302 bool Comm::is_assigned() const
303 {
304   return (pimpl_ && boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->is_assigned()) ||
305          mailbox_ != nullptr;
306 }
307
308 Comm* Comm::do_start()
309 {
310   xbt_assert(get_state() == State::INITED || get_state() == State::STARTING,
311              "You cannot use %s() once your communication started (not implemented)", __FUNCTION__);
312   if (get_source() != nullptr || get_destination() != nullptr) {
313     xbt_assert(is_assigned(), "When either from_ or to_ is specified, both must be.");
314     xbt_assert(src_buff_ == nullptr && dst_buff_ == nullptr,
315                "Direct host-to-host communications cannot carry any data.");
316     XBT_DEBUG("host-to-host Comm. Pimpl already created and set, just start it.");
317     on_start(*this);
318     on_this_start(*this);
319     kernel::actor::simcall_answered([this] {
320       pimpl_->set_state(kernel::activity::State::READY);
321       boost::static_pointer_cast<kernel::activity::CommImpl>(pimpl_)->start();
322     });
323   } else if (src_buff_ != nullptr) { // Sender side
324     on_send(*this);
325     on_this_send(*this);
326     kernel::actor::CommIsendSimcall observer{sender_,
327                                              mailbox_->get_impl(),
328                                              remains_,
329                                              rate_,
330                                              static_cast<unsigned char*>(src_buff_),
331                                              src_buff_size_,
332                                              match_fun_,
333                                              clean_fun_,
334                                              copy_data_function_,
335                                              get_data<void>(),
336                                              detached_};
337     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::isend(&observer); },
338                                              &observer);
339   } else if (dst_buff_ != nullptr) { // Receiver side
340     xbt_assert(not detached_, "Receive cannot be detached");
341     on_recv(*this);
342     on_this_recv(*this);
343     kernel::actor::CommIrecvSimcall observer{receiver_,
344                                              mailbox_->get_impl(),
345                                              static_cast<unsigned char*>(dst_buff_),
346                                              &dst_buff_size_,
347                                              match_fun_,
348                                              copy_data_function_,
349                                              get_data<void>(),
350                                              rate_};
351     pimpl_ = kernel::actor::simcall_answered([&observer] { return kernel::activity::CommImpl::irecv(&observer); },
352                                              &observer);
353   } else {
354     xbt_die("Cannot start a communication before specifying whether we are the sender or the receiver");
355   }
356
357   if (suspended_)
358     pimpl_->suspend();
359
360   if (not detached_) {
361     pimpl_->set_iface(this);
362     pimpl_->set_actor(sender_);
363     // Only throw the signal when both sides are here and the status is READY
364     if (pimpl_->get_state() != kernel::activity::State::WAITING) {
365       on_start(*this);
366       on_this_start(*this);
367     }
368   }
369
370   state_ = State::STARTED;
371   return this;
372 }
373
374 Comm* Comm::detach()
375 {
376   xbt_assert(state_ == State::INITED || state_ == State::STARTING,
377              "You cannot use %s() once your communication is %s (not implemented)", __FUNCTION__, get_state_str());
378   xbt_assert(dst_buff_ == nullptr && dst_buff_size_ == 0, "You can only detach sends, not recvs");
379   detached_ = true;
380   start();
381   return this;
382 }
383
384 ssize_t Comm::test_any(const std::vector<CommPtr>& comms)
385 {
386   std::vector<ActivityPtr> activities;
387   for (const auto& comm : comms)
388     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
389   return Activity::test_any(activities);
390 }
391
392 /** @brief Block the calling actor until the communication is finished, or until timeout
393  *
394  * On timeout, an exception is thrown and the communication is invalidated.
395  *
396  * @param timeout the amount of seconds to wait for the comm termination.
397  *                Negative values denote infinite wait times. 0 as a timeout returns immediately. */
398 Comm* Comm::wait_for(double timeout)
399 {
400   XBT_DEBUG("Calling Comm::wait_for with state %s", get_state_str());
401   kernel::actor::ActorImpl* issuer = nullptr;
402   switch (state_) {
403     case State::FINISHED:
404       break;
405     case State::FAILED:
406       throw NetworkFailureException(XBT_THROW_POINT, "Cannot wait for a failed communication");
407     case State::INITED:
408     case State::STARTING: // It's not started yet. Do it in one simcall if it's a regular communication
409       if (get_source() != nullptr || get_destination() != nullptr) {
410         return start()->wait_for(timeout); // In the case of host2host comm, do it in two simcalls
411       } else if (src_buff_ != nullptr) {
412         on_send(*this);
413         on_this_send(*this);
414         send(sender_, mailbox_, remains_, rate_, src_buff_, src_buff_size_, match_fun_, copy_data_function_,
415              get_data<void>(), timeout);
416
417       } else { // Receiver
418         on_recv(*this);
419         on_this_recv(*this);
420         recv(receiver_, mailbox_, dst_buff_, &dst_buff_size_, match_fun_, copy_data_function_, get_data<void>(),
421              timeout, rate_);
422       }
423       break;
424     case State::STARTED:
425       try {
426         issuer = kernel::actor::ActorImpl::self();
427         kernel::actor::ActivityWaitSimcall observer{issuer, pimpl_.get(), timeout};
428         if (kernel::actor::simcall_blocking(
429                 [&observer] { observer.get_activity()->wait_for(observer.get_issuer(), observer.get_timeout()); },
430                 &observer)) {
431           throw TimeoutException(XBT_THROW_POINT, "Timeouted");
432         }
433       } catch (const NetworkFailureException& e) {
434         issuer->simcall_.observer_ = nullptr; // Comm failed on network failure, reset the observer to nullptr
435         complete(State::FAILED);
436         e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
437       }
438       break;
439
440     case State::CANCELED:
441       throw CancelException(XBT_THROW_POINT, "Communication canceled");
442
443     default:
444       THROW_IMPOSSIBLE;
445   }
446   complete(State::FINISHED);
447   return this;
448 }
449
450 ssize_t Comm::wait_any_for(const std::vector<CommPtr>& comms, double timeout)
451 {
452   std::vector<ActivityPtr> activities;
453   for (const auto& comm : comms)
454     activities.push_back(boost::dynamic_pointer_cast<Activity>(comm));
455   ssize_t changed_pos;
456   try {
457     changed_pos = Activity::wait_any_for(activities, timeout);
458   } catch (const NetworkFailureException& e) {
459     changed_pos = -1;
460     for (auto c : comms) {
461       if (c->pimpl_->get_state() == kernel::activity::State::FAILED) {
462         c->complete(State::FAILED);
463       }
464     }
465     e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
466   }
467   return changed_pos;
468 }
469
470 void Comm::wait_all(const std::vector<CommPtr>& comms)
471 {
472   // TODO: this should be a simcall or something
473   for (auto& comm : comms)
474     comm->wait();
475 }
476
477 size_t Comm::wait_all_for(const std::vector<CommPtr>& comms, double timeout)
478 {
479   if (timeout < 0.0) {
480     wait_all(comms);
481     return comms.size();
482   }
483
484   double deadline = Engine::get_clock() + timeout;
485   std::vector<CommPtr> waited_comm(1, nullptr);
486   for (size_t i = 0; i < comms.size(); i++) {
487     double wait_timeout = std::max(0.0, deadline - Engine::get_clock());
488     waited_comm[0]      = comms[i];
489     // Using wait_any_for() here (and not wait_for) because we don't want comms to be invalidated on timeout
490     if (wait_any_for(waited_comm, wait_timeout) == -1) {
491       XBT_DEBUG("Timeout (%g): i = %zu", wait_timeout, i);
492       return i;
493     }
494   }
495   return comms.size();
496 }
497 } // namespace simgrid::s4u
498 /* **************************** Public C interface *************************** */
499 void sg_comm_detach(sg_comm_t comm, void (*clean_function)(void*))
500 {
501   comm->detach(clean_function);
502   comm->unref();
503 }
504 void sg_comm_unref(sg_comm_t comm)
505 {
506   comm->unref();
507 }
508 int sg_comm_test(sg_comm_t comm)
509 {
510   bool finished = comm->test();
511   if (finished)
512     comm->unref();
513   return finished;
514 }
515
516 sg_error_t sg_comm_wait(sg_comm_t comm)
517 {
518   return sg_comm_wait_for(comm, -1);
519 }
520
521 sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout)
522 {
523   sg_error_t status = SG_OK;
524
525   simgrid::s4u::CommPtr s4u_comm(comm, false);
526   try {
527     s4u_comm->wait_for(timeout);
528   } catch (const simgrid::TimeoutException&) {
529     status = SG_ERROR_TIMEOUT;
530   } catch (const simgrid::CancelException&) {
531     status = SG_ERROR_CANCELED;
532   } catch (const simgrid::NetworkFailureException&) {
533     status = SG_ERROR_NETWORK;
534   }
535   return status;
536 }
537
538 void sg_comm_wait_all(sg_comm_t* comms, size_t count)
539 {
540   sg_comm_wait_all_for(comms, count, -1);
541 }
542
543 size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout)
544 {
545   std::vector<simgrid::s4u::CommPtr> s4u_comms;
546   for (size_t i = 0; i < count; i++)
547     s4u_comms.emplace_back(comms[i], false);
548
549   size_t pos = simgrid::s4u::Comm::wait_all_for(s4u_comms, timeout);
550   for (size_t i = pos; i < count; i++)
551     s4u_comms[i]->add_ref();
552   return pos;
553 }
554
555 ssize_t sg_comm_wait_any(sg_comm_t* comms, size_t count)
556 {
557   return sg_comm_wait_any_for(comms, count, -1);
558 }
559
560 ssize_t sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout)
561 {
562   std::vector<simgrid::s4u::CommPtr> s4u_comms;
563   for (size_t i = 0; i < count; i++)
564     s4u_comms.emplace_back(comms[i], false);
565
566   ssize_t pos = simgrid::s4u::Comm::wait_any_for(s4u_comms, timeout);
567   for (size_t i = 0; i < count; i++) {
568     if (pos != -1 && static_cast<size_t>(pos) != i)
569       s4u_comms[i]->add_ref();
570   }
571   return pos;
572 }