1 /* Copyright (c) 2013-2021. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include "src/plugins/vm/VmLiveMigration.hpp"
7 #include "simgrid/Exception.hpp"
8 #include "src/instr/instr_private.hpp"
9 #include "src/plugins/vm/VirtualMachineImpl.hpp"
10 #include "src/plugins/vm/VmHostExt.hpp"
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(vm_live_migration, s4u, "S4U virtual machines live migration");
16 xbt::Extension<s4u::Host, VmMigrationExt> VmMigrationExt::EXTENSION_ID;
18 void VmMigrationExt::ensureVmMigrationExtInstalled()
20 if (not EXTENSION_ID.valid())
21 EXTENSION_ID = s4u::Host::extension_create<VmMigrationExt>();
24 void MigrationRx::operator()()
26 XBT_DEBUG("mig: rx_start");
27 bool received_finalize = false;
29 std::string finalize_task_name =
30 std::string("__mig_stage3:") + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" + dst_pm_->get_cname() + ")";
32 while (not received_finalize) {
33 auto payload = mbox->get_unique<std::string>();
35 if (finalize_task_name == *payload)
36 received_finalize = true;
39 // Here Stage 1, 2 and 3 have been performed.
40 // Hence complete the migration
42 /* Update the vm location */
43 /* precopy migration makes the VM temporally paused */
44 xbt_assert(vm_->get_state() == s4u::VirtualMachine::state::SUSPENDED);
46 /* Update the vm location and resume it */
50 // Now the VM is running on the new host (the migration is completed) (even if the SRC crash)
51 vm_->get_impl()->end_migration();
52 XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm_->get_cname(), src_pm_->get_cname(), dst_pm_->get_cname());
54 if (TRACE_vm_is_enabled()) {
55 static long long int counter = 0;
56 std::string key = std::to_string(counter);
60 auto* msg = instr::Container::by_name(vm_->get_name());
61 instr::Container::get_root()->get_link("VM_LINK")->start_event(msg, "M", key);
63 // destroy existing container of this vm
64 instr::Container::by_name(vm_->get_name())->remove_from_parent();
66 // create new container on the new_host location
67 new instr::Container(vm_->get_name(), "VM", instr::Container::by_name(dst_pm_->get_name()));
70 msg = instr::Container::by_name(vm_->get_name());
71 instr::Container::get_root()->get_link("VM_LINK")->end_event(msg, "M", key);
73 // Inform the SRC that the migration has been correctly performed
74 auto* payload = new std::string("__mig_stage4:");
75 *payload = *payload + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" + dst_pm_->get_cname() + ")";
77 mbox_ctl->put(payload, 0);
79 XBT_DEBUG("mig: rx_done");
82 static sg_size_t get_updated_size(double computed, double dp_rate, sg_size_t dp_cap)
84 auto updated_size = static_cast<sg_size_t>(computed * dp_rate);
85 XBT_DEBUG("updated_size %llu dp_rate %f", updated_size, dp_rate);
86 if (updated_size > dp_cap) {
87 updated_size = dp_cap;
93 sg_size_t MigrationTx::sendMigrationData(sg_size_t size, int stage, int stage2_round, double mig_speed, double timeout)
95 sg_size_t sent = size;
96 auto* msg = new std::string("__mig_stage");
97 *msg = *msg + std::to_string(stage) + ":" + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" +
98 dst_pm_->get_cname() + ")";
100 double clock_sta = s4u::Engine::get_clock();
102 s4u::CommPtr comm = mbox->put_init(msg, size);
104 comm->set_rate(mig_speed);
106 comm->wait_for(timeout);
107 } catch (const Exception&) {
108 auto remaining = static_cast<sg_size_t>(comm->get_remaining());
109 XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size);
114 double clock_end = s4u::Engine::get_clock();
115 double duration = clock_end - clock_sta;
116 double actual_speed = static_cast<double>(size) / duration;
119 XBT_DEBUG("mig-stage%d.%d: sent %llu duration %f actual_speed %f (target %f)", stage, stage2_round, size, duration,
120 actual_speed, mig_speed);
122 XBT_DEBUG("mig-stage%d: sent %llu duration %f actual_speed %f (target %f)", stage, size, duration, actual_speed,
128 void MigrationTx::operator()()
130 XBT_DEBUG("mig: tx_start");
132 double host_speed = vm_->get_pm()->get_speed();
133 const sg_size_t ramsize = vm_->get_ramsize();
134 const double dp_rate =
135 host_speed != 0.0 ? (sg_vm_get_migration_speed(vm_) * sg_vm_get_dirty_page_intensity(vm_)) / host_speed : 1;
136 const sg_size_t dp_cap = sg_vm_get_working_set_memory(vm_);
137 const double mig_speed = sg_vm_get_migration_speed(vm_);
138 double max_downtime = sg_vm_get_max_downtime(vm_);
140 double mig_timeout = 10000000.0;
141 bool skip_stage2 = false;
143 size_t remaining_size = ramsize;
145 double clock_prev_send;
146 double clock_post_send;
150 /* check parameters */
152 XBT_WARN("migrate a VM, but ramsize is zero");
154 if (max_downtime <= 0) {
155 XBT_WARN("use the default max_downtime value 30ms");
159 /* Stage1: send all memory pages to the destination. */
160 XBT_DEBUG("mig-stage1: remaining_size %zu", remaining_size);
161 sg_vm_start_dirty_page_tracking(vm_);
163 double computed_during_stage1 = 0;
164 clock_prev_send = s4u::Engine::get_clock();
167 /* At stage 1, we do not need timeout. We have to send all the memory pages even though the duration of this
168 * transfer exceeds the timeout value. */
169 XBT_VERB("Stage 1: Gonna send %llu bytes", ramsize);
170 sg_size_t sent = sendMigrationData(ramsize, 1, 0, mig_speed, -1);
171 remaining_size -= sent;
172 computed_during_stage1 = sg_vm_lookup_computed_flops(vm_);
174 if (sent < ramsize) {
175 XBT_VERB("mig-stage1: timeout, force moving to stage 3");
177 } else if (sent > ramsize)
179 } catch (const Exception&) {
180 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code)
181 // Stop the dirty page tracking and return (there is no memory space to release)
182 sg_vm_stop_dirty_page_tracking(vm_);
186 clock_post_send = s4u::Engine::get_clock();
187 mig_timeout -= (clock_post_send - clock_prev_send);
188 if (mig_timeout < 0) {
189 XBT_VERB("The duration of stage 1 exceeds the timeout value, skip stage 2");
193 /* estimate bandwidth */
194 bandwidth = ramsize / (clock_post_send - clock_prev_send);
195 threshold = bandwidth * max_downtime;
196 XBT_DEBUG("actual bandwidth %f (MB/s), threshold %zu", bandwidth / 1024 / 1024, threshold);
198 /* Stage2: send update pages iteratively until the size of remaining states becomes smaller than threshold value. */
199 if (not skip_stage2) {
200 int stage2_round = 0;
201 /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */
202 sg_size_t updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap);
203 remaining_size += updated_size;
204 XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size,
205 (remaining_size < threshold) ? "<" : ">", threshold);
207 /* When the remaining size is below the threshold value, move to stage 3. */
208 while (threshold < remaining_size) {
209 XBT_DEBUG("mig-stage 2:%d updated_size %llu computed_during_stage1 %f dp_rate %f dp_cap %llu", stage2_round,
210 updated_size, computed_during_stage1, dp_rate, dp_cap);
213 clock_prev_send = s4u::Engine::get_clock();
215 XBT_DEBUG("Stage 2, gonna send %llu", updated_size);
216 sent = sendMigrationData(updated_size, 2, stage2_round, mig_speed, mig_timeout);
217 } catch (const Exception&) {
218 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data
220 // Stop the dirty page tracking and return (there is no memory space to release)
221 sg_vm_stop_dirty_page_tracking(vm_);
225 remaining_size -= sent;
226 double computed = sg_vm_lookup_computed_flops(vm_);
228 clock_post_send = s4u::Engine::get_clock();
230 if (sent == updated_size) {
231 bandwidth = updated_size / (clock_post_send - clock_prev_send);
232 threshold = bandwidth * max_downtime;
233 XBT_DEBUG("actual bandwidth %f, threshold %zu", bandwidth / 1024 / 1024, threshold);
235 mig_timeout -= (clock_post_send - clock_prev_send);
236 xbt_assert(mig_timeout > 0);
237 XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size,
238 (remaining_size < threshold) ? "<" : ">", threshold);
239 updated_size = get_updated_size(computed, dp_rate, dp_cap);
240 remaining_size += updated_size;
242 /* When timeout happens, we move to stage 3. The size of memory pages
243 * updated before timeout must be added to the remaining size. */
244 XBT_VERB("mig-stage2.%d: timeout, force moving to stage 3. sent %llu / %llu, eta %lf", stage2_round, sent,
245 updated_size, (clock_post_send - clock_prev_send));
246 updated_size = get_updated_size(computed, dp_rate, dp_cap);
247 remaining_size += updated_size;
253 /* Stage3: stop the VM and copy the rest of states. */
254 XBT_DEBUG("mig-stage3: remaining_size %zu", remaining_size);
256 sg_vm_stop_dirty_page_tracking(vm_);
259 XBT_DEBUG("Stage 3: Gonna send %zu bytes", remaining_size);
260 sendMigrationData(remaining_size, 3, 0, mig_speed, -1);
261 } catch (const Exception&) {
262 // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code)
263 // Stop the dirty page tracking and return (there is no memory space to release)
268 // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate
269 // effectively the VM on the DST node.
270 XBT_DEBUG("mig: tx_done");
273 } // namespace simgrid
275 static void onVirtualMachineShutdown(simgrid::s4u::VirtualMachine const& vm)
277 if (vm.get_impl()->is_migrating()) {
278 vm.extension<simgrid::vm::VmMigrationExt>()->rx_->kill();
279 vm.extension<simgrid::vm::VmMigrationExt>()->tx_->kill();
280 vm.extension<simgrid::vm::VmMigrationExt>()->issuer_->kill();
281 vm.get_impl()->end_migration();
285 void sg_vm_live_migration_plugin_init()
287 sg_vm_dirty_page_tracking_init();
288 simgrid::vm::VmMigrationExt::ensureVmMigrationExtInstalled();
289 simgrid::s4u::VirtualMachine::on_shutdown.connect(&onVirtualMachineShutdown);
292 simgrid::s4u::VirtualMachine* sg_vm_create_migratable(simgrid::s4u::Host* pm, const char* name, int coreAmount,
293 int ramsize, int mig_netspeed, int dp_intensity)
295 simgrid::vm::VmHostExt::ensureVmExtInstalled();
297 /* For the moment, intensity_rate is the percentage against the migration bandwidth */
299 auto* vm = new simgrid::s4u::VirtualMachine(name, pm, coreAmount, static_cast<sg_size_t>(ramsize) * 1024 * 1024);
300 sg_vm_set_dirty_page_intensity(vm, dp_intensity / 100.0);
301 sg_vm_set_working_set_memory(vm, vm->get_ramsize() * 0.9); // assume working set memory is 90% of ramsize
302 sg_vm_set_migration_speed(vm, mig_netspeed * 1024 * 1024.0);
304 XBT_DEBUG("migspeed : %f intensity mem : %d", mig_netspeed * 1024 * 1024.0, dp_intensity);
309 int sg_vm_is_migrating(const simgrid::s4u::VirtualMachine* vm)
311 return vm->get_impl()->is_migrating();
314 void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm)
316 simgrid::s4u::Host* src_pm = vm->get_pm();
318 if (not src_pm->is_on())
319 throw simgrid::VmFailureException(
320 XBT_THROW_POINT, simgrid::xbt::string_printf("Cannot migrate VM '%s' from host '%s', which is offline.",
321 vm->get_cname(), src_pm->get_cname()));
322 if (not dst_pm->is_on())
323 throw simgrid::VmFailureException(
324 XBT_THROW_POINT, simgrid::xbt::string_printf("Cannot migrate VM '%s' to host '%s', which is offline.",
325 vm->get_cname(), dst_pm->get_cname()));
326 if (vm->get_state() != simgrid::s4u::VirtualMachine::state::RUNNING)
327 throw simgrid::VmFailureException(
329 simgrid::xbt::string_printf("Cannot migrate VM '%s' that is not running yet.", vm->get_cname()));
330 if (vm->get_impl()->is_migrating())
331 throw simgrid::VmFailureException(
333 simgrid::xbt::string_printf("Cannot migrate VM '%s' that is already migrating.", vm->get_cname()));
335 vm->get_impl()->start_migration();
336 simgrid::s4u::VirtualMachine::on_migration_start(*vm);
338 std::string rx_name =
339 std::string("__pr_mig_rx:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")";
340 std::string tx_name =
341 std::string("__pr_mig_tx:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")";
343 simgrid::s4u::ActorPtr rx =
344 simgrid::s4u::Actor::create(rx_name.c_str(), dst_pm, simgrid::vm::MigrationRx(vm, dst_pm));
345 simgrid::s4u::ActorPtr tx =
346 simgrid::s4u::Actor::create(tx_name.c_str(), src_pm, simgrid::vm::MigrationTx(vm, dst_pm));
348 vm->extension_set<simgrid::vm::VmMigrationExt>(new simgrid::vm::VmMigrationExt(simgrid::s4u::Actor::self(), rx, tx));
350 /* wait until the migration have finished or on error has occurred */
351 XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed");
352 simgrid::s4u::Mailbox* mbox_ctl = simgrid::s4u::Mailbox::by_name(
353 std::string("__mbox_mig_ctl:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")");
354 mbox_ctl->get_unique<std::string>();
358 vm->get_impl()->end_migration();
359 simgrid::s4u::VirtualMachine::on_migration_end(*vm);