From: Frederic Suter Date: Mon, 22 Jan 2018 10:09:30 +0000 (+0100) Subject: Close #105 X-Git-Tag: v3.19~320 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/81e3c97faf69f3b26a807758efedde49e2a6abf6?hp=b2703f0d6d70062bc2074d9b60258356f23d18ba Close #105 When a VM is shutdown, a signal is triggered which is captured in the VM live migration plugin. If the VM was currently migrating, it kills the RX and TX actors and the actor that was used to call sg_vm_migrate in an asynchronous way. Smart pointers on these three actors are stored in a new extension of the VM which is created when started a migration. A test for this feature has been added in teshsuite/s4u/cloud-interrupt-migration which corresponds to the scenario described in the issue --- diff --git a/include/simgrid/plugins/live_migration.h b/include/simgrid/plugins/live_migration.h index 99665ba2ef..7f32c4fde9 100644 --- a/include/simgrid/plugins/live_migration.h +++ b/include/simgrid/plugins/live_migration.h @@ -13,6 +13,7 @@ SG_BEGIN_DECL() XBT_PUBLIC(void) sg_vm_live_migration_plugin_init(); +XBT_PRIVATE void sg_vm_dirty_page_tracking_init(); XBT_PUBLIC(void) sg_vm_start_dirty_page_tracking(sg_vm_t vm); XBT_PUBLIC(void) sg_vm_stop_dirty_page_tracking(sg_vm_t vm); XBT_PUBLIC(double) sg_vm_lookup_computed_flops(sg_vm_t vm); diff --git a/include/simgrid/s4u/VirtualMachine.hpp b/include/simgrid/s4u/VirtualMachine.hpp index c4cca14c27..233be568c6 100644 --- a/include/simgrid/s4u/VirtualMachine.hpp +++ b/include/simgrid/s4u/VirtualMachine.hpp @@ -60,6 +60,7 @@ public: void setBound(double bound); e_surf_vm_state_t getState(); + static simgrid::xbt::signal onVmShutdown; }; } } // namespace simgrid::s4u diff --git a/src/plugins/vm/VmLiveMigration.cpp b/src/plugins/vm/VmLiveMigration.cpp index ba9cee4d94..bac67f3dd8 100644 --- a/src/plugins/vm/VmLiveMigration.cpp +++ b/src/plugins/vm/VmLiveMigration.cpp @@ -16,6 +16,13 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(vm_live_migration, "S4U virtual machines live migra namespace simgrid { namespace vm { +simgrid::xbt::Extension VmMigrationExt::EXTENSION_ID; + +void VmMigrationExt::ensureVmMigrationExtInstalled() +{ + if (not EXTENSION_ID.valid()) + EXTENSION_ID = simgrid::s4u::Host::extension_create(); +} void MigrationRx::operator()() { @@ -111,6 +118,7 @@ sg_size_t MigrationTx::sendMigrationData(sg_size_t size, int stage, int stage2_r XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size); sent -= remaining; } + delete msg; } double clock_end = s4u::Engine::getClock(); @@ -281,6 +289,24 @@ void MigrationTx::operator()() } SG_BEGIN_DECL() + +static void onVirtualMachineShutdown(simgrid::s4u::VirtualMachine* vm) +{ + if (vm->isMigrating()) { + vm->extension()->rx_->kill(); + vm->extension()->tx_->kill(); + vm->extension()->issuer_->kill(); + vm->getImpl()->isMigrating = false; + } +} + +void sg_vm_live_migration_plugin_init() +{ + sg_vm_dirty_page_tracking_init(); + simgrid::vm::VmMigrationExt::ensureVmMigrationExtInstalled(); + simgrid::s4u::VirtualMachine::onVmShutdown.connect(&onVirtualMachineShutdown); +} + simgrid::s4u::VirtualMachine* sg_vm_create_migratable(simgrid::s4u::Host* pm, const char* name, int coreAmount, int ramsize, int mig_netspeed, int dp_intensity) { @@ -328,12 +354,13 @@ void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm) simgrid::s4u::ActorPtr tx = simgrid::s4u::Actor::createActor(tx_name.c_str(), src_pm, simgrid::vm::MigrationTx(vm, dst_pm)); + vm->extension_set(new simgrid::vm::VmMigrationExt(simgrid::s4u::Actor::self(), rx, tx)); + /* wait until the migration have finished or on error has occurred */ XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed"); simgrid::s4u::MailboxPtr mbox_ctl = simgrid::s4u::Mailbox::byName( std::string("__mbox_mig_ctl:") + vm->getCname() + "(" + src_pm->getCname() + "-" + dst_pm->getCname() + ")"); delete static_cast(mbox_ctl->get()); - tx->join(); rx->join(); diff --git a/src/plugins/vm/VmLiveMigration.hpp b/src/plugins/vm/VmLiveMigration.hpp index 7df29b45bd..39cde37a6c 100644 --- a/src/plugins/vm/VmLiveMigration.hpp +++ b/src/plugins/vm/VmLiveMigration.hpp @@ -10,6 +10,18 @@ namespace simgrid { namespace vm { +class VmMigrationExt { +public: + s4u::ActorPtr issuer_ = nullptr; + s4u::ActorPtr tx_ = nullptr; + s4u::ActorPtr rx_ = nullptr; + static simgrid::xbt::Extension EXTENSION_ID; + virtual ~VmMigrationExt() = default; + explicit VmMigrationExt(s4u::ActorPtr issuer, s4u::ActorPtr rx, s4u::ActorPtr tx) : issuer_(issuer), tx_(tx), rx_(rx) + { + } + static void ensureVmMigrationExtInstalled(); +}; class MigrationRx { /* The miration_rx process uses mbox_ctl to let the caller of do_migration() know the completion of the migration. */ diff --git a/src/plugins/vm/s4u_VirtualMachine.cpp b/src/plugins/vm/s4u_VirtualMachine.cpp index 3ca10336ca..848098e285 100644 --- a/src/plugins/vm/s4u_VirtualMachine.cpp +++ b/src/plugins/vm/s4u_VirtualMachine.cpp @@ -13,6 +13,8 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_vm, "S4U virtual machines"); +simgrid::xbt::signal simgrid::s4u::VirtualMachine::onVmShutdown; + namespace simgrid { namespace s4u { @@ -128,13 +130,11 @@ void VirtualMachine::shutdown() { smx_actor_t issuer = SIMIX_process_self(); simgrid::simix::kernelImmediate([this, issuer]() { pimpl_vm_->shutdown(issuer); }); + onVmShutdown(this); } void VirtualMachine::destroy() { - if (isMigrating()) - THROWF(vm_error, 0, "Cannot destroy VM '%s', which is migrating.", getCname()); - /* First, terminate all processes on the VM if necessary */ shutdown(); diff --git a/src/surf/plugins/dirty_page_tracking.cpp b/src/surf/plugins/dirty_page_tracking.cpp index 6c3710bc77..e5232cc8bd 100644 --- a/src/surf/plugins/dirty_page_tracking.cpp +++ b/src/surf/plugins/dirty_page_tracking.cpp @@ -104,7 +104,7 @@ static void onExecCompletion(simgrid::kernel::activity::ExecImplPtr exec) SG_BEGIN_DECL() -void sg_vm_live_migration_plugin_init() +void sg_vm_dirty_page_tracking_init() { if (not simgrid::vm::VmDirtyPageTrackingExt::EXTENSION_ID.valid()) { simgrid::vm::VmDirtyPageTrackingExt::EXTENSION_ID = diff --git a/teshsuite/s4u/CMakeLists.txt b/teshsuite/s4u/CMakeLists.txt index e753c24b2c..cdcb343cd9 100644 --- a/teshsuite/s4u/CMakeLists.txt +++ b/teshsuite/s4u/CMakeLists.txt @@ -1,4 +1,7 @@ -foreach(x actor comm-pt2pt concurrent_rw host_on_off_wait listen_async pid storage_client_server) +foreach(x actor + comm-pt2pt + cloud-interrupt-migration + concurrent_rw storage_client_server host_on_off_wait listen_async pid ) add_executable (${x} ${x}/${x}.cpp) target_link_libraries(${x} simgrid) set_target_properties(${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) @@ -8,7 +11,7 @@ endforeach() ## Add the tests. ## Some need to be run with all factories, some need not tesh to run -foreach(x actor concurrent_rw) +foreach(x actor cloud-interrupt-migration concurrent_rw) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh) ADD_TESH_FACTORIES(tesh-s4u-${x} "thread;ucontext;raw;boost" --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} ${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x}/${x}.tesh) endforeach() diff --git a/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp new file mode 100644 index 0000000000..afb79fbafb --- /dev/null +++ b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp @@ -0,0 +1,72 @@ +/* Copyright (c) 2017. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "simgrid/plugins/live_migration.h" +#include "simgrid/s4u.hpp" +#include "simgrid/s4u/VirtualMachine.hpp" + +XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_cloud_interrupt_migration, "Messages specific for this example"); + +static void vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm) +{ + simgrid::s4u::Host* src_pm = vm->getPm(); + double mig_sta = simgrid::s4u::Engine::getClock(); + sg_vm_migrate(vm, dst_pm); + double mig_end = simgrid::s4u::Engine::getClock(); + + XBT_INFO("%s migrated: %s->%s in %g s", vm->getCname(), src_pm->getCname(), dst_pm->getCname(), mig_end - mig_sta); +} + +static simgrid::s4u::ActorPtr vm_migrate_async(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm) +{ + return simgrid::s4u::Actor::createActor("mig_wrk", simgrid::s4u::Host::current(), vm_migrate, vm, dst_pm); +} + +static void master_main() +{ + simgrid::s4u::Host* pm0 = simgrid::s4u::Host::by_name("Fafard"); + simgrid::s4u::Host* pm1 = simgrid::s4u::Host::by_name("Tremblay"); + + simgrid::s4u::VirtualMachine* vm0 = new simgrid::s4u::VirtualMachine("VM0", pm0, 1); + vm0->setRamsize(1e9); // 1Gbytes + vm0->start(); + + XBT_INFO("Start the migration of %s from %s to %s", vm0->getCname(), pm0->getCname(), pm1->getCname()); + simgrid::s4u::ActorPtr migration = vm_migrate_async(vm0, pm1); + + simgrid::s4u::this_actor::sleep_for(2); + XBT_INFO("Wait! change my mind, shutdown %s. This ends the migration", vm0->getCname()); + vm0->shutdown(); + + simgrid::s4u::this_actor::sleep_for(8); + + XBT_INFO("Start again the migration of %s from %s to %s", vm0->getCname(), pm0->getCname(), pm1->getCname()); + + vm0->start(); + vm_migrate_async(vm0, pm1); + + XBT_INFO("Wait for the completion of the migration this time"); + simgrid::s4u::this_actor::sleep_for(200); + vm0->destroy(); +} + +int main(int argc, char* argv[]) +{ + /* Get the arguments */ + simgrid::s4u::Engine e(&argc, argv); + sg_vm_live_migration_plugin_init(); + + /* load the platform file */ + e.loadPlatform(argv[1]); + + simgrid::s4u::Actor::createActor("master_", simgrid::s4u::Host::by_name("Fafard"), master_main); + + e.run(); + + XBT_INFO("Bye (simulation time %g)", simgrid::s4u::Engine::getClock()); + + return 0; +} diff --git a/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh new file mode 100644 index 0000000000..fe95a5b023 --- /dev/null +++ b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh @@ -0,0 +1,7 @@ +$ $SG_TEST_EXENV ${bindir:=.}/cloud-interrupt-migration ${platfdir}/small_platform.xml --log=no_loc "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n" +> [ 0.000000] (1:master_@Fafard) Start the migration of VM0 from Fafard to Tremblay +> [ 2.000000] (1:master_@Fafard) Wait! change my mind, shutdown VM0. This ends the migration +> [ 10.000000] (1:master_@Fafard) Start again the migration of VM0 from Fafard to Tremblay +> [ 10.000000] (1:master_@Fafard) Wait for the completion of the migration this time +> [142.765801] (5:mig_wrk@Fafard) VM0 migrated: Fafard->Tremblay in 132.766 s +> [210.000000] (0:maestro@) Bye (simulation time 210)