1 /* Copyright (c) 2013-2017. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include <simgrid/plugins/live_migration.h>
7 #include <simgrid/s4u.hpp>
8 #include <simgrid/s4u/VirtualMachine.hpp>
9 #include <src/instr/instr_private.hpp>
10 #include <src/plugins/vm/VirtualMachineImpl.hpp>
11 #include <src/plugins/vm/VmLiveMigration.hpp>
14 XBT_LOG_NEW_DEFAULT_CATEGORY(vm_live_migration, "S4U virtual machines live migration");
19 void MigrationRx::operator()()
21 XBT_DEBUG("mig: rx_start");
22 bool received_finalize = false;
24 std::string finalize_task_name =
25 std::string("__mig_stage3:") + vm_->getCname() + "(" + src_pm_->getCname() + "-" + dst_pm_->getCname() + ")";
27 while (not received_finalize) {
28 std::string* payload = static_cast<std::string*>(mbox->get());
30 if (finalize_task_name == *payload)
31 received_finalize = true;
36 // Here Stage 1, 2 and 3 have been performed.
37 // Hence complete the migration
39 /* Update the vm location */
40 /* precopy migration makes the VM temporally paused */
41 xbt_assert(vm_->getState() == SURF_VM_STATE_SUSPENDED);
43 /* Update the vm location and resume it */
47 // Now the VM is running on the new host (the migration is completed) (even if the SRC crash)
48 vm_->pimpl_vm_->isMigrating = false;
49 XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm_->getCname(), src_pm_->getCname(), dst_pm_->getCname());
51 if (TRACE_msg_vm_is_enabled()) {
52 static long long int counter = 0;
53 std::string key = std::to_string(counter);
57 container_t msg = simgrid::instr::Container::byName(vm_->getName());
58 simgrid::instr::Container::getRoot()->getLink("MSG_VM_LINK")->startEvent(msg, "M", key);
60 // destroy existing container of this vm
61 container_t existing_container = simgrid::instr::Container::byName(vm_->getName());
62 existing_container->removeFromParent();
63 delete existing_container;
65 // create new container on the new_host location
66 new simgrid::instr::Container(vm_->getCname(), "MSG_VM", simgrid::instr::Container::byName(dst_pm_->getName()));
69 msg = simgrid::instr::Container::byName(vm_->getName());
70 simgrid::instr::Container::getRoot()->getLink("MSG_VM_LINK")->endEvent(msg, "M", key);
72 // Inform the SRC that the migration has been correctly performed
73 std::string* payload = new std::string("__mig_stage4:");
74 *payload = *payload + vm_->getCname() + "(" + src_pm_->getCname() + "-" + dst_pm_->getCname() + ")";
76 mbox_ctl->put(payload, 0);
78 XBT_DEBUG("mig: rx_done");
81 static sg_size_t get_updated_size(double computed, double dp_rate, sg_size_t dp_cap)
83 sg_size_t updated_size = static_cast<sg_size_t>(computed * dp_rate);
84 XBT_DEBUG("updated_size %llu dp_rate %f", updated_size, dp_rate);
85 if (updated_size > dp_cap) {
86 updated_size = dp_cap;
92 sg_size_t MigrationTx::sendMigrationData(sg_size_t size, int stage, int stage2_round, double mig_speed, double timeout)
94 sg_size_t sent = size;
95 std::string* msg = new std::string("__mig_stage");
96 *msg = *msg + std::to_string(stage) + ":" + vm_->getCname() + "(" + src_pm_->getCname() + "-" + dst_pm_->getCname() +
99 double clock_sta = s4u::Engine::getClock();
101 s4u::Activity* comm = nullptr;
104 comm = mbox->put_init(msg, size)->setRate(mig_speed)->wait(timeout);
106 comm = mbox->put_async(msg, size)->wait();
107 } catch (xbt_ex& e) {
109 sg_size_t remaining = static_cast<sg_size_t>(comm->getRemains());
110 XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size);
115 double clock_end = s4u::Engine::getClock();
116 double duration = clock_end - clock_sta;
117 double actual_speed = size / duration;
120 XBT_DEBUG("mig-stage%d.%d: sent %llu duration %f actual_speed %f (target %f)", stage, stage2_round, size, duration,
121 actual_speed, mig_speed);
123 XBT_DEBUG("mig-stage%d: sent %llu duration %f actual_speed %f (target %f)", stage, size, duration, actual_speed,
129 void MigrationTx::operator()()
131 XBT_DEBUG("mig: tx_start");
133 double host_speed = vm_->getPm()->getSpeed();
134 const sg_size_t ramsize = vm_->getRamsize();
135 const double dp_rate =
136 host_speed ? (sg_vm_get_migration_speed(vm_) * sg_vm_get_dirty_page_intensity(vm_)) / host_speed : 1;
137 const sg_size_t dp_cap = sg_vm_get_working_set_memory(vm_);
138 const double mig_speed = sg_vm_get_migration_speed(vm_);
139 double max_downtime = sg_vm_get_max_downtime(vm_);
141 double mig_timeout = 10000000.0;
142 bool skip_stage2 = false;
144 size_t remaining_size = ramsize;
145 size_t threshold = 0.0;
147 /* check parameters */
149 XBT_WARN("migrate a VM, but ramsize is zero");
151 if (max_downtime <= 0) {
152 XBT_WARN("use the default max_downtime value 30ms");
156 /* Stage1: send all memory pages to the destination. */
157 XBT_DEBUG("mig-stage1: remaining_size %zu", remaining_size);
158 sg_vm_start_dirty_page_tracking(vm_);
160 double computed_during_stage1 = 0;
161 double clock_prev_send = s4u::Engine::getClock();
164 /* At stage 1, we do not need timeout. We have to send all the memory pages even though the duration of this
165 * transfer exceeds the timeout value. */
166 XBT_VERB("Stage 1: Gonna send %llu bytes", ramsize);
167 sg_size_t sent = sendMigrationData(ramsize, 1, 0, mig_speed, -1);
168 remaining_size -= sent;
169 computed_during_stage1 = sg_vm_lookup_computed_flops(vm_);
171 if (sent < ramsize) {
172 XBT_VERB("mig-stage1: timeout, force moving to stage 3");
174 } else if (sent > ramsize)
177 } catch (xbt_ex& e) {
178 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code)
179 // Stop the dirty page tracking an return (there is no memory space to release)
180 sg_vm_stop_dirty_page_tracking(vm_);
184 double clock_post_send = s4u::Engine::getClock();
185 mig_timeout -= (clock_post_send - clock_prev_send);
186 if (mig_timeout < 0) {
187 XBT_VERB("The duration of stage 1 exceeds the timeout value, skip stage 2");
191 /* estimate bandwidth */
192 double bandwidth = ramsize / (clock_post_send - clock_prev_send);
193 threshold = bandwidth * max_downtime;
194 XBT_DEBUG("actual bandwidth %f (MB/s), threshold %zu", bandwidth / 1024 / 1024, threshold);
196 /* Stage2: send update pages iteratively until the size of remaining states becomes smaller than threshold value. */
197 if (not skip_stage2) {
199 int stage2_round = 0;
201 sg_size_t updated_size = 0;
202 if (stage2_round == 0) {
203 /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */
204 updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap);
206 double computed = sg_vm_lookup_computed_flops(vm_);
207 updated_size = get_updated_size(computed, dp_rate, dp_cap);
210 XBT_DEBUG("mig-stage 2:%d updated_size %llu computed_during_stage1 %f dp_rate %f dp_cap %llu", stage2_round,
211 updated_size, computed_during_stage1, dp_rate, dp_cap);
213 /* Check whether the remaining size is below the threshold value. If so, move to stage 3. */
214 remaining_size += updated_size;
215 XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size,
216 (remaining_size < threshold) ? "<" : ">", threshold);
217 if (remaining_size < threshold)
221 double clock_prev_send = s4u::Engine::getClock();
223 XBT_DEBUG("Stage 2, gonna send %llu", updated_size);
224 sent = sendMigrationData(updated_size, 2, stage2_round, mig_speed, mig_timeout);
225 } catch (xbt_ex& e) {
226 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data
228 // Stop the dirty page tracking an return (there is no memory space to release)
229 sg_vm_stop_dirty_page_tracking(vm_);
232 double clock_post_send = s4u::Engine::getClock();
234 if (sent == updated_size) {
235 /* timeout did not happen */
236 double bandwidth = updated_size / (clock_post_send - clock_prev_send);
237 threshold = bandwidth * max_downtime;
238 XBT_DEBUG("actual bandwidth %f, threshold %zu", bandwidth / 1024 / 1024, threshold);
239 remaining_size -= sent;
241 mig_timeout -= (clock_post_send - clock_prev_send);
242 xbt_assert(mig_timeout > 0);
244 } else if (sent < updated_size) {
245 /* When timeout happens, we move to stage 3. The size of memory pages
246 * updated before timeout must be added to the remaining size. */
247 XBT_VERB("mig-stage2.%d: timeout, force moving to stage 3. sent %llu / %llu, eta %lf", stage2_round, sent,
248 updated_size, (clock_post_send - clock_prev_send));
249 remaining_size -= sent;
251 double computed = sg_vm_lookup_computed_flops(vm_);
252 updated_size = get_updated_size(computed, dp_rate, dp_cap);
253 remaining_size += updated_size;
260 /* Stage3: stop the VM and copy the rest of states. */
261 XBT_DEBUG("mig-stage3: remaining_size %zu", remaining_size);
263 sg_vm_stop_dirty_page_tracking(vm_);
266 XBT_DEBUG("Stage 3: Gonna send %zu bytes", remaining_size);
267 sendMigrationData(remaining_size, 3, 0, mig_speed, -1);
268 } catch (xbt_ex& e) {
269 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code)
270 // Stop the dirty page tracking an return (there is no memory space to release)
275 // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate
276 // effectively the VM on the DST node.
277 XBT_DEBUG("mig: tx_done");
283 void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm)
285 simgrid::s4u::Host* src_pm = vm->getPm();
288 THROWF(vm_error, 0, "Cannot migrate VM '%s' from host '%s', which is offline.", vm->getCname(), src_pm->getCname());
290 THROWF(vm_error, 0, "Cannot migrate VM '%s' to host '%s', which is offline.", vm->getCname(), dst_pm->getCname());
291 if (vm->getState() != SURF_VM_STATE_RUNNING)
292 THROWF(vm_error, 0, "Cannot migrate VM '%s' that is not running yet.", vm->getCname());
293 if (vm->isMigrating())
294 THROWF(vm_error, 0, "Cannot migrate VM '%s' that is already migrating.", vm->getCname());
296 vm->pimpl_vm_->isMigrating = true;
298 std::string rx_name =
299 std::string("__pr_mig_rx:") + vm->getCname() + "(" + src_pm->getCname() + "-" + dst_pm->getCname() + ")";
300 std::string tx_name =
301 std::string("__pr_mig_tx:") + vm->getCname() + "(" + src_pm->getCname() + "-" + dst_pm->getCname() + ")";
303 simgrid::s4u::ActorPtr rx =
304 simgrid::s4u::Actor::createActor(rx_name.c_str(), dst_pm, simgrid::vm::MigrationRx(vm, dst_pm));
305 simgrid::s4u::ActorPtr tx =
306 simgrid::s4u::Actor::createActor(tx_name.c_str(), src_pm, simgrid::vm::MigrationTx(vm, dst_pm));
308 /* wait until the migration have finished or on error has occurred */
309 XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed");
310 simgrid::s4u::MailboxPtr mbox_ctl = simgrid::s4u::Mailbox::byName(
311 std::string("__mbox_mig_ctl:") + vm->getCname() + "(" + src_pm->getCname() + "-" + dst_pm->getCname() + ")");
312 delete static_cast<std::string*>(mbox_ctl->get());
317 vm->pimpl_vm_->isMigrating = false;