Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
0a8c889a61824ec583b16c0006f6dd72ff3d1007
[simgrid.git] / src / surf / network_ib.cpp
1 /* Copyright (c) 2014-2020. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include "src/surf/network_ib.hpp"
7 #include "simgrid/sg_config.hpp"
8 #include "src/surf/HostImpl.hpp"
9 #include "src/surf/xml/platf.hpp"
10 #include "surf/surf.hpp"
11
12 #include <boost/algorithm/string/classification.hpp>
13 #include <boost/algorithm/string/split.hpp>
14
15 XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(surf_network);
16
17 static void IB_create_host_callback(simgrid::s4u::Host const& host)
18 {
19   using simgrid::kernel::resource::IBNode;
20   using simgrid::kernel::resource::NetworkIBModel;
21
22   static int id=0;
23
24   ((NetworkIBModel*)surf_network_model)->active_nodes.emplace(host.get_name(), IBNode(id));
25   id++;
26 }
27
28 static void IB_action_state_changed_callback(simgrid::kernel::resource::NetworkAction& action,
29                                              simgrid::kernel::resource::Action::State /*previous*/)
30 {
31   using simgrid::kernel::resource::IBNode;
32   using simgrid::kernel::resource::NetworkIBModel;
33
34   if (action.get_state() != simgrid::kernel::resource::Action::State::FINISHED)
35     return;
36   std::pair<IBNode*, IBNode*> pair = ((NetworkIBModel*)surf_network_model)->active_comms[&action];
37   XBT_DEBUG("IB callback - action %p finished", &action);
38
39   ((NetworkIBModel*)surf_network_model)->updateIBfactors(&action, pair.first, pair.second, 1);
40
41   ((NetworkIBModel*)surf_network_model)->active_comms.erase(&action);
42 }
43
44 static void IB_action_init_callback(simgrid::kernel::resource::NetworkAction& action)
45 {
46   simgrid::kernel::resource::NetworkIBModel* ibModel = (simgrid::kernel::resource::NetworkIBModel*)surf_network_model;
47   simgrid::kernel::resource::IBNode* act_src         = &ibModel->active_nodes.at(action.get_src().get_name());
48   simgrid::kernel::resource::IBNode* act_dst         = &ibModel->active_nodes.at(action.get_dst().get_name());
49
50   ibModel->active_comms[&action] = std::make_pair(act_src, act_dst);
51   ibModel->updateIBfactors(&action, act_src, act_dst, 0);
52 }
53
54 /*********
55  * Model *
56  *********/
57
58 /************************************************************************/
59 /* New model based on MPI contention model for Infiniband platforms */
60 /************************************************************************/
61 /* @Inproceedings{mescal_vienne_phd, */
62 /*  author={Jérôme Vienne}, */
63 /*  title={prédiction de performances d’applications de calcul haute performance sur réseau Infiniband}, */
64 /*  address={Grenoble FRANCE}, */
65 /*  month=june, */
66 /*  year={2010} */
67 /*  } */
68 void surf_network_model_init_IB()
69 {
70   xbt_assert(surf_network_model == nullptr, "Cannot set the network model twice");
71
72   surf_network_model = new simgrid::kernel::resource::NetworkIBModel();
73   simgrid::s4u::Link::on_communication_state_change.connect(IB_action_state_changed_callback);
74   simgrid::s4u::Link::on_communicate.connect(IB_action_init_callback);
75   simgrid::s4u::Host::on_creation.connect(IB_create_host_callback);
76   simgrid::config::set_default<double>("network/weight-S", 8775);
77 }
78
79 namespace simgrid {
80 namespace kernel {
81 namespace resource {
82
83 NetworkIBModel::NetworkIBModel() : NetworkSmpiModel()
84 {
85   /* Do not add this into all_existing_models: our ancestor already does so */
86
87   std::string IB_factors_string = config::get_value<std::string>("smpi/IB-penalty-factors");
88   std::vector<std::string> radical_elements;
89   boost::split(radical_elements, IB_factors_string, boost::is_any_of(";"));
90
91   surf_parse_assert(radical_elements.size() == 3, "smpi/IB-penalty-factors should be provided and contain 3 "
92                                                   "elements, semi-colon separated. Example: 0.965;0.925;1.35");
93
94   try {
95     Be = std::stod(radical_elements.front());
96   } catch (const std::invalid_argument& ia) {
97     throw std::invalid_argument(std::string("First part of smpi/IB-penalty-factors is not numerical:") + ia.what());
98   }
99
100   try {
101     Bs = std::stod(radical_elements.at(1));
102   } catch (const std::invalid_argument& ia) {
103     throw std::invalid_argument(std::string("Second part of smpi/IB-penalty-factors is not numerical:") + ia.what());
104   }
105
106   try {
107     ys = std::stod(radical_elements.back());
108   } catch (const std::invalid_argument& ia) {
109     throw std::invalid_argument(std::string("Third part of smpi/IB-penalty-factors is not numerical:") + ia.what());
110   }
111 }
112
113 void NetworkIBModel::computeIBfactors(IBNode* root) const
114 {
115   double num_comm_out    = root->ActiveCommsUp.size();
116   double max_penalty_out = 0.0;
117   // first, compute all outbound penalties to get their max
118   for (ActiveComm const* comm : root->ActiveCommsUp) {
119     double my_penalty_out = 1.0;
120
121     if (num_comm_out != 1) {
122       if (comm->destination->nbActiveCommsDown > 2) // number of comms sent to the receiving node
123         my_penalty_out = num_comm_out * Bs * ys;
124       else
125         my_penalty_out = num_comm_out * Bs;
126     }
127
128     max_penalty_out = std::max(max_penalty_out, my_penalty_out);
129   }
130
131   for (ActiveComm* comm : root->ActiveCommsUp) {
132     // compute inbound penalty
133     double my_penalty_in = 1.0;
134     int nb_comms         = comm->destination->nbActiveCommsDown; // total number of incoming comms
135     if (nb_comms != 1)
136       my_penalty_in = (comm->destination->ActiveCommsDown)[root]        // number of comm sent to dest by root node
137                       * Be * comm->destination->ActiveCommsDown.size(); // number of different nodes sending to dest
138
139     double penalty = std::max(my_penalty_in, max_penalty_out);
140
141     double rate_before_update = comm->action->get_bound();
142     // save initial rate of the action
143     if (comm->init_rate == -1)
144       comm->init_rate = rate_before_update;
145
146     double penalized_bw = num_comm_out ? comm->init_rate / penalty : comm->init_rate;
147
148     if (not double_equals(penalized_bw, rate_before_update, sg_surf_precision)) {
149       XBT_DEBUG("%d->%d action %p penalty updated : bw now %f, before %f , initial rate %f", root->id,
150                 comm->destination->id, comm->action, penalized_bw, comm->action->get_bound(), comm->init_rate);
151       get_maxmin_system()->update_variable_bound(comm->action->get_variable(), penalized_bw);
152     } else {
153       XBT_DEBUG("%d->%d action %p penalty not updated : bw %f, initial rate %f", root->id, comm->destination->id,
154                 comm->action, penalized_bw, comm->init_rate);
155     }
156   }
157   XBT_DEBUG("Finished computing IB penalties");
158 }
159
160 void NetworkIBModel::updateIBfactors_rec(IBNode* root, std::vector<bool>& updatedlist) const
161 {
162   if (not updatedlist[root->id]) {
163     XBT_DEBUG("IB - Updating rec %d", root->id);
164     computeIBfactors(root);
165     updatedlist[root->id] = true;
166     for (ActiveComm const* comm : root->ActiveCommsUp) {
167       if (not updatedlist[comm->destination->id])
168         updateIBfactors_rec(comm->destination, updatedlist);
169     }
170     for (std::map<IBNode*, int>::value_type const& comm : root->ActiveCommsDown) {
171       if (not updatedlist[comm.first->id])
172         updateIBfactors_rec(comm.first, updatedlist);
173     }
174   }
175 }
176
177 void NetworkIBModel::updateIBfactors(NetworkAction* action, IBNode* from, IBNode* to, int remove) const
178 {
179   if (from == to) // disregard local comms (should use loopback)
180     return;
181
182   if (remove) {
183     if (to->ActiveCommsDown[from] == 1)
184       to->ActiveCommsDown.erase(from);
185     else
186       to->ActiveCommsDown[from] -= 1;
187
188     to->nbActiveCommsDown--;
189     std::vector<ActiveComm*>::iterator it =
190         std::find_if(begin(from->ActiveCommsUp), end(from->ActiveCommsUp),
191                      [action](const ActiveComm* comm) { return comm->action == action; });
192     if (it != std::end(from->ActiveCommsUp)) {
193       delete *it;
194       from->ActiveCommsUp.erase(it);
195     }
196     action->unref();
197   } else {
198     action->ref();
199     ActiveComm* comm  = new ActiveComm();
200     comm->action      = action;
201     comm->destination = to;
202     from->ActiveCommsUp.push_back(comm);
203
204     to->ActiveCommsDown[from] += 1;
205     to->nbActiveCommsDown++;
206   }
207   XBT_DEBUG("IB - Updating %d", from->id);
208   std::vector<bool> updated(active_nodes.size(), false);
209   updateIBfactors_rec(from, updated);
210   XBT_DEBUG("IB - Finished updating %d", from->id);
211 }
212 } // namespace resource
213 } // namespace kernel
214 } // namespace simgrid