X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/4d648ebbbe5705878080b9cbf1ca61497323c592..4c753f8d4cabd4104f3f7109823f16be2ebdcce3:/src/plugins/vm/VmLiveMigration.cpp diff --git a/src/plugins/vm/VmLiveMigration.cpp b/src/plugins/vm/VmLiveMigration.cpp index 10a8a1973b..75f3a02228 100644 --- a/src/plugins/vm/VmLiveMigration.cpp +++ b/src/plugins/vm/VmLiveMigration.cpp @@ -1,24 +1,24 @@ -/* Copyright (c) 2013-2019. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2013-2022. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ -#include "src/plugins/vm/VmLiveMigration.hpp" -#include "simgrid/Exception.hpp" +#include +#include + #include "src/instr/instr_private.hpp" -#include "src/plugins/vm/VirtualMachineImpl.hpp" -#include "src/plugins/vm/VmHostExt.hpp" +#include "src/kernel/resource/VirtualMachineImpl.hpp" +#include "src/plugins/vm/VmLiveMigration.hpp" -XBT_LOG_NEW_DEFAULT_CATEGORY(vm_live_migration, "S4U virtual machines live migration"); +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(vm_live_migration, s4u, "S4U virtual machines live migration"); -namespace simgrid { -namespace vm { -simgrid::xbt::Extension VmMigrationExt::EXTENSION_ID; +namespace simgrid::plugin::vm { +xbt::Extension VmMigrationExt::EXTENSION_ID; void VmMigrationExt::ensureVmMigrationExtInstalled() { if (not EXTENSION_ID.valid()) - EXTENSION_ID = simgrid::s4u::Host::extension_create(); + EXTENSION_ID = s4u::Host::extension_create(); } void MigrationRx::operator()() @@ -27,15 +27,13 @@ void MigrationRx::operator()() bool received_finalize = false; std::string finalize_task_name = - std::string("__mig_stage3:") + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" + dst_pm_->get_cname() + ")"; + "__mig_stage3:" + vm_->get_name() + "(" + src_pm_->get_name() + "-" + dst_pm_->get_name() + ")"; while (not received_finalize) { - std::string* payload = static_cast(mbox->get()); + auto payload = mbox->get_unique(); if (finalize_task_name == *payload) received_finalize = true; - - delete payload; } // Here Stage 1, 2 and 3 have been performed. @@ -43,14 +41,14 @@ void MigrationRx::operator()() /* Update the vm location */ /* precopy migration makes the VM temporally paused */ - xbt_assert(vm_->get_state() == s4u::VirtualMachine::state::SUSPENDED); + xbt_assert(vm_->get_state() == s4u::VirtualMachine::State::SUSPENDED); /* Update the vm location and resume it */ vm_->set_pm(dst_pm_); vm_->resume(); // Now the VM is running on the new host (the migration is completed) (even if the SRC crash) - vm_->get_impl()->is_migrating_ = false; + vm_->get_vm_impl()->end_migration(); XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm_->get_cname(), src_pm_->get_cname(), dst_pm_->get_cname()); if (TRACE_vm_is_enabled()) { @@ -59,22 +57,22 @@ void MigrationRx::operator()() counter++; // start link - container_t msg = simgrid::instr::Container::by_name(vm_->get_name()); - simgrid::instr::Container::get_root()->get_link("VM_LINK")->start_event(msg, "M", key); + auto* msg = instr::Container::by_name(vm_->get_name()); + instr::Container::get_root()->get_link("VM_LINK")->start_event(msg, "M", key); // destroy existing container of this vm - simgrid::instr::Container::by_name(vm_->get_name())->remove_from_parent(); + instr::Container::by_name(vm_->get_name())->remove_from_parent(); // create new container on the new_host location - new simgrid::instr::Container(vm_->get_name(), "VM", simgrid::instr::Container::by_name(dst_pm_->get_name())); + new instr::Container(vm_->get_name(), "VM", instr::Container::by_name(dst_pm_->get_name())); // end link - msg = simgrid::instr::Container::by_name(vm_->get_name()); - simgrid::instr::Container::get_root()->get_link("VM_LINK")->end_event(msg, "M", key); + msg = instr::Container::by_name(vm_->get_name()); + instr::Container::get_root()->get_link("VM_LINK")->end_event(msg, "M", key); } // Inform the SRC that the migration has been correctly performed - std::string* payload = new std::string("__mig_stage4:"); - *payload = *payload + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" + dst_pm_->get_cname() + ")"; + auto* payload = new std::string("__mig_stage4:"); + *payload = *payload + vm_->get_name() + "(" + src_pm_->get_name() + "-" + dst_pm_->get_name() + ")"; mbox_ctl->put(payload, 0); @@ -83,7 +81,7 @@ void MigrationRx::operator()() static sg_size_t get_updated_size(double computed, double dp_rate, sg_size_t dp_cap) { - sg_size_t updated_size = static_cast(computed * dp_rate); + auto updated_size = static_cast(computed * dp_rate); XBT_DEBUG("updated_size %llu dp_rate %f", updated_size, dp_rate); if (updated_size > dp_cap) { updated_size = dp_cap; @@ -95,30 +93,25 @@ static sg_size_t get_updated_size(double computed, double dp_rate, sg_size_t dp_ sg_size_t MigrationTx::sendMigrationData(sg_size_t size, int stage, int stage2_round, double mig_speed, double timeout) { sg_size_t sent = size; - std::string* msg = new std::string("__mig_stage"); - *msg = *msg + std::to_string(stage) + ":" + vm_->get_cname() + "(" + src_pm_->get_cname() + "-" + - dst_pm_->get_cname() + ")"; - + auto msg = std::make_unique(xbt::string_printf("__mig_stage%d:%s(%s-%s)", stage, vm_->get_cname(), + src_pm_->get_cname(), dst_pm_->get_cname())); double clock_sta = s4u::Engine::get_clock(); - s4u::Activity* comm = nullptr; + s4u::CommPtr comm = mbox->put_init(msg.get(), size); + if (mig_speed > 0) + comm->set_rate(mig_speed); try { - if (mig_speed > 0) - comm = mbox->put_init(msg, size)->set_rate(mig_speed)->wait_for(timeout); - else - comm = mbox->put_async(msg, size)->wait_for(timeout); - } catch (xbt_ex& e) { - if (comm) { - sg_size_t remaining = static_cast(comm->get_remaining()); - XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size); - sent -= remaining; - } - delete msg; + comm->wait_for(timeout); + msg.release(); + } catch (const Exception&) { + auto remaining = static_cast(comm->get_remaining()); + XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size); + sent -= remaining; } double clock_end = s4u::Engine::get_clock(); double duration = clock_end - clock_sta; - double actual_speed = size / duration; + double actual_speed = static_cast(size) / duration; if (stage == 2) XBT_DEBUG("mig-stage%d.%d: sent %llu duration %f actual_speed %f (target %f)", stage, stage2_round, size, duration, @@ -137,7 +130,7 @@ void MigrationTx::operator()() double host_speed = vm_->get_pm()->get_speed(); const sg_size_t ramsize = vm_->get_ramsize(); const double dp_rate = - host_speed ? (sg_vm_get_migration_speed(vm_) * sg_vm_get_dirty_page_intensity(vm_)) / host_speed : 1; + host_speed != 0.0 ? (sg_vm_get_migration_speed(vm_) * sg_vm_get_dirty_page_intensity(vm_)) / host_speed : 1; const sg_size_t dp_cap = sg_vm_get_working_set_memory(vm_); const double mig_speed = sg_vm_get_migration_speed(vm_); double max_downtime = sg_vm_get_max_downtime(vm_); @@ -181,10 +174,9 @@ void MigrationTx::operator()() skip_stage2 = true; } else if (sent > ramsize) XBT_CRITICAL("bug"); - - } catch (xbt_ex& e) { + } catch (const Exception&) { // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code) - // Stop the dirty page tracking an return (there is no memory space to release) + // Stop the dirty page tracking and return (there is no memory space to release) sg_vm_stop_dirty_page_tracking(vm_); return; } @@ -203,65 +195,56 @@ void MigrationTx::operator()() /* Stage2: send update pages iteratively until the size of remaining states becomes smaller than threshold value. */ if (not skip_stage2) { - int stage2_round = 0; - for (;;) { - sg_size_t updated_size = 0; - if (stage2_round == 0) { - /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */ - updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap); - } else { - double computed = sg_vm_lookup_computed_flops(vm_); - updated_size = get_updated_size(computed, dp_rate, dp_cap); - } - + /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */ + sg_size_t updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap); + remaining_size += updated_size; + XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size, + (remaining_size < threshold) ? "<" : ">", threshold); + + /* When the remaining size is below the threshold value, move to stage 3. */ + while (threshold < remaining_size) { XBT_DEBUG("mig-stage 2:%d updated_size %llu computed_during_stage1 %f dp_rate %f dp_cap %llu", stage2_round, updated_size, computed_during_stage1, dp_rate, dp_cap); - /* Check whether the remaining size is below the threshold value. If so, move to stage 3. */ - remaining_size += updated_size; - XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size, - (remaining_size < threshold) ? "<" : ">", threshold); - if (remaining_size < threshold) - break; - sg_size_t sent = 0; clock_prev_send = s4u::Engine::get_clock(); try { XBT_DEBUG("Stage 2, gonna send %llu", updated_size); sent = sendMigrationData(updated_size, 2, stage2_round, mig_speed, mig_timeout); - } catch (xbt_ex& e) { + } catch (const Exception&) { // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data // code) - // Stop the dirty page tracking an return (there is no memory space to release) + // Stop the dirty page tracking and return (there is no memory space to release) sg_vm_stop_dirty_page_tracking(vm_); return; } + + remaining_size -= sent; + double computed = sg_vm_lookup_computed_flops(vm_); + clock_post_send = s4u::Engine::get_clock(); if (sent == updated_size) { - /* timeout did not happen */ bandwidth = updated_size / (clock_post_send - clock_prev_send); threshold = bandwidth * max_downtime; XBT_DEBUG("actual bandwidth %f, threshold %zu", bandwidth / 1024 / 1024, threshold); - remaining_size -= sent; stage2_round += 1; mig_timeout -= (clock_post_send - clock_prev_send); xbt_assert(mig_timeout > 0); - - } else if (sent < updated_size) { + XBT_DEBUG("mig-stage2.%d: remaining_size %zu (%s threshold %zu)", stage2_round, remaining_size, + (remaining_size < threshold) ? "<" : ">", threshold); + updated_size = get_updated_size(computed, dp_rate, dp_cap); + remaining_size += updated_size; + } else { /* When timeout happens, we move to stage 3. The size of memory pages * updated before timeout must be added to the remaining size. */ XBT_VERB("mig-stage2.%d: timeout, force moving to stage 3. sent %llu / %llu, eta %lf", stage2_round, sent, updated_size, (clock_post_send - clock_prev_send)); - remaining_size -= sent; - - double computed = sg_vm_lookup_computed_flops(vm_); updated_size = get_updated_size(computed, dp_rate, dp_cap); remaining_size += updated_size; break; - } else - XBT_CRITICAL("bug"); + } } } @@ -273,9 +256,9 @@ void MigrationTx::operator()() try { XBT_DEBUG("Stage 3: Gonna send %zu bytes", remaining_size); sendMigrationData(remaining_size, 3, 0, mig_speed, -1); - } catch (xbt_ex& e) { + } catch (const Exception&) { // hostfailure (if you want to know whether this is the SRC or the DST check directly in send_migration_data code) - // Stop the dirty page tracking an return (there is no memory space to release) + // Stop the dirty page tracking and return (there is no memory space to release) vm_->resume(); return; } @@ -284,34 +267,35 @@ void MigrationTx::operator()() // effectively the VM on the DST node. XBT_DEBUG("mig: tx_done"); } -} -} +} // namespace simgrid::plugin::vm + +using simgrid::plugin::vm::VmMigrationExt; static void onVirtualMachineShutdown(simgrid::s4u::VirtualMachine const& vm) { - if (vm.get_impl()->is_migrating_) { - vm.extension()->rx_->kill(); - vm.extension()->tx_->kill(); - vm.extension()->issuer_->kill(); - vm.get_impl()->is_migrating_ = false; + if (vm.get_vm_impl()->is_migrating()) { + vm.extension()->rx_->kill(); + vm.extension()->tx_->kill(); + vm.extension()->issuer_->kill(); + vm.get_vm_impl()->end_migration(); } } void sg_vm_live_migration_plugin_init() { sg_vm_dirty_page_tracking_init(); - simgrid::vm::VmMigrationExt::ensureVmMigrationExtInstalled(); - simgrid::s4u::VirtualMachine::on_shutdown.connect(&onVirtualMachineShutdown); + VmMigrationExt::ensureVmMigrationExtInstalled(); + simgrid::s4u::VirtualMachine::on_shutdown_cb(&onVirtualMachineShutdown); } simgrid::s4u::VirtualMachine* sg_vm_create_migratable(simgrid::s4u::Host* pm, const char* name, int coreAmount, int ramsize, int mig_netspeed, int dp_intensity) { - simgrid::vm::VmHostExt::ensureVmExtInstalled(); + simgrid::s4u::VmHostExt::ensureVmExtInstalled(); /* For the moment, intensity_rate is the percentage against the migration bandwidth */ - sg_vm_t vm = new simgrid::s4u::VirtualMachine(name, pm, coreAmount, static_cast(ramsize) * 1024 * 1024); + auto* vm = pm->create_vm(name, coreAmount, static_cast(ramsize) * 1024 * 1024); sg_vm_set_dirty_page_intensity(vm, dp_intensity / 100.0); sg_vm_set_working_set_memory(vm, vm->get_ramsize() * 0.9); // assume working set memory is 90% of ramsize sg_vm_set_migration_speed(vm, mig_netspeed * 1024 * 1024.0); @@ -321,9 +305,9 @@ simgrid::s4u::VirtualMachine* sg_vm_create_migratable(simgrid::s4u::Host* pm, co return vm; } -int sg_vm_is_migrating(simgrid::s4u::VirtualMachine* vm) +int sg_vm_is_migrating(const simgrid::s4u::VirtualMachine* vm) { - return vm->get_impl()->is_migrating_; + return vm->get_vm_impl()->is_migrating(); } void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm) @@ -331,38 +315,41 @@ void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm) simgrid::s4u::Host* src_pm = vm->get_pm(); if (not src_pm->is_on()) - THROWF(vm_error, 0, "Cannot migrate VM '%s' from host '%s', which is offline.", vm->get_cname(), - src_pm->get_cname()); + throw simgrid::VmFailureException( + XBT_THROW_POINT, simgrid::xbt::string_printf("Cannot migrate VM '%s' from host '%s', which is offline.", + vm->get_cname(), src_pm->get_cname())); if (not dst_pm->is_on()) - THROWF(vm_error, 0, "Cannot migrate VM '%s' to host '%s', which is offline.", vm->get_cname(), dst_pm->get_cname()); - if (vm->get_state() != simgrid::s4u::VirtualMachine::state::RUNNING) - THROWF(vm_error, 0, "Cannot migrate VM '%s' that is not running yet.", vm->get_cname()); - if (vm->get_impl()->is_migrating_) - THROWF(vm_error, 0, "Cannot migrate VM '%s' that is already migrating.", vm->get_cname()); - - vm->get_impl()->is_migrating_ = true; - simgrid::s4u::VirtualMachine::on_migration_start(*vm); - - std::string rx_name = - std::string("__pr_mig_rx:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")"; - std::string tx_name = - std::string("__pr_mig_tx:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")"; + throw simgrid::VmFailureException( + XBT_THROW_POINT, simgrid::xbt::string_printf("Cannot migrate VM '%s' to host '%s', which is offline.", + vm->get_cname(), dst_pm->get_cname())); + if (vm->get_state() != simgrid::s4u::VirtualMachine::State::RUNNING) + throw simgrid::VmFailureException( + XBT_THROW_POINT, + simgrid::xbt::string_printf("Cannot migrate VM '%s' that is not running yet.", vm->get_cname())); + if (vm->get_vm_impl()->is_migrating()) + throw simgrid::VmFailureException( + XBT_THROW_POINT, + simgrid::xbt::string_printf("Cannot migrate VM '%s' that is already migrating.", vm->get_cname())); + + vm->start_migration(); + + std::string rx_name = "__pr_mig_rx:" + vm->get_name() + "(" + src_pm->get_name() + "-" + dst_pm->get_name() + ")"; + std::string tx_name = "__pr_mig_tx:" + vm->get_name() + "(" + src_pm->get_name() + "-" + dst_pm->get_name() + ")"; simgrid::s4u::ActorPtr rx = - simgrid::s4u::Actor::create(rx_name.c_str(), dst_pm, simgrid::vm::MigrationRx(vm, dst_pm)); + simgrid::s4u::Actor::create(rx_name.c_str(), dst_pm, simgrid::plugin::vm::MigrationRx(vm, dst_pm)); simgrid::s4u::ActorPtr tx = - simgrid::s4u::Actor::create(tx_name.c_str(), src_pm, simgrid::vm::MigrationTx(vm, dst_pm)); + simgrid::s4u::Actor::create(tx_name.c_str(), src_pm, simgrid::plugin::vm::MigrationTx(vm, dst_pm)); - vm->extension_set(new simgrid::vm::VmMigrationExt(simgrid::s4u::Actor::self(), rx, tx)); + vm->extension_set(new VmMigrationExt(simgrid::s4u::Actor::self(), rx, tx)); /* wait until the migration have finished or on error has occurred */ XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed"); - simgrid::s4u::Mailbox* mbox_ctl = simgrid::s4u::Mailbox::by_name( - std::string("__mbox_mig_ctl:") + vm->get_cname() + "(" + src_pm->get_cname() + "-" + dst_pm->get_cname() + ")"); - delete static_cast(mbox_ctl->get()); + simgrid::s4u::Mailbox* mbox_ctl = simgrid::s4u::Mailbox::by_name("__mbox_mig_ctl:" + vm->get_name() + "(" + + src_pm->get_name() + "-" + dst_pm->get_name() + ")"); + mbox_ctl->get_unique(); tx->join(); rx->join(); - vm->get_impl()->is_migrating_ = false; - simgrid::s4u::VirtualMachine::on_migration_end(*vm); + vm->end_migration(); }