Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Close #105
authorFrederic Suter <frederic.suter@cc.in2p3.fr>
Mon, 22 Jan 2018 10:09:30 +0000 (11:09 +0100)
committerFrederic Suter <frederic.suter@cc.in2p3.fr>
Mon, 22 Jan 2018 10:09:30 +0000 (11:09 +0100)
When a VM is shutdown, a signal is triggered which is captured in the VM
live migration plugin. If the VM was currently migrating, it kills the
RX and TX actors and the actor that was used to call sg_vm_migrate in
an asynchronous way. Smart pointers on these three actors are stored
in a new extension of the VM which is created when started a migration.

A test for this feature has been added in
teshsuite/s4u/cloud-interrupt-migration which corresponds to the
scenario described in the issue

include/simgrid/plugins/live_migration.h
include/simgrid/s4u/VirtualMachine.hpp
src/plugins/vm/VmLiveMigration.cpp
src/plugins/vm/VmLiveMigration.hpp
src/plugins/vm/s4u_VirtualMachine.cpp
src/surf/plugins/dirty_page_tracking.cpp
teshsuite/s4u/CMakeLists.txt
teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp [new file with mode: 0644]
teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh [new file with mode: 0644]

index 99665ba..7f32c4f 100644 (file)
@@ -13,6 +13,7 @@
 SG_BEGIN_DECL()
 
 XBT_PUBLIC(void) sg_vm_live_migration_plugin_init();
+XBT_PRIVATE void sg_vm_dirty_page_tracking_init();
 XBT_PUBLIC(void) sg_vm_start_dirty_page_tracking(sg_vm_t vm);
 XBT_PUBLIC(void) sg_vm_stop_dirty_page_tracking(sg_vm_t vm);
 XBT_PUBLIC(double) sg_vm_lookup_computed_flops(sg_vm_t vm);
index c4cca14..233be56 100644 (file)
@@ -60,6 +60,7 @@ public:
   void setBound(double bound);
 
   e_surf_vm_state_t getState();
+  static simgrid::xbt::signal<void(simgrid::s4u::VirtualMachine*)> onVmShutdown;
 };
 }
 } // namespace simgrid::s4u
index ba9cee4..bac67f3 100644 (file)
@@ -16,6 +16,13 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(vm_live_migration, "S4U virtual machines live migra
 
 namespace simgrid {
 namespace vm {
+simgrid::xbt::Extension<s4u::Host, VmMigrationExt> VmMigrationExt::EXTENSION_ID;
+
+void VmMigrationExt::ensureVmMigrationExtInstalled()
+{
+  if (not EXTENSION_ID.valid())
+    EXTENSION_ID = simgrid::s4u::Host::extension_create<VmMigrationExt>();
+}
 
 void MigrationRx::operator()()
 {
@@ -111,6 +118,7 @@ sg_size_t MigrationTx::sendMigrationData(sg_size_t size, int stage, int stage2_r
       XBT_VERB("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu", timeout, remaining, size);
       sent -= remaining;
     }
+    delete msg;
   }
 
   double clock_end    = s4u::Engine::getClock();
@@ -281,6 +289,24 @@ void MigrationTx::operator()()
 }
 
 SG_BEGIN_DECL()
+
+static void onVirtualMachineShutdown(simgrid::s4u::VirtualMachine* vm)
+{
+  if (vm->isMigrating()) {
+    vm->extension<simgrid::vm::VmMigrationExt>()->rx_->kill();
+    vm->extension<simgrid::vm::VmMigrationExt>()->tx_->kill();
+    vm->extension<simgrid::vm::VmMigrationExt>()->issuer_->kill();
+    vm->getImpl()->isMigrating = false;
+  }
+}
+
+void sg_vm_live_migration_plugin_init()
+{
+  sg_vm_dirty_page_tracking_init();
+  simgrid::vm::VmMigrationExt::ensureVmMigrationExtInstalled();
+  simgrid::s4u::VirtualMachine::onVmShutdown.connect(&onVirtualMachineShutdown);
+}
+
 simgrid::s4u::VirtualMachine* sg_vm_create_migratable(simgrid::s4u::Host* pm, const char* name, int coreAmount,
                                                       int ramsize, int mig_netspeed, int dp_intensity)
 {
@@ -328,12 +354,13 @@ void sg_vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm)
   simgrid::s4u::ActorPtr tx =
       simgrid::s4u::Actor::createActor(tx_name.c_str(), src_pm, simgrid::vm::MigrationTx(vm, dst_pm));
 
+  vm->extension_set<simgrid::vm::VmMigrationExt>(new simgrid::vm::VmMigrationExt(simgrid::s4u::Actor::self(), rx, tx));
+
   /* wait until the migration have finished or on error has occurred */
   XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed");
   simgrid::s4u::MailboxPtr mbox_ctl = simgrid::s4u::Mailbox::byName(
       std::string("__mbox_mig_ctl:") + vm->getCname() + "(" + src_pm->getCname() + "-" + dst_pm->getCname() + ")");
   delete static_cast<std::string*>(mbox_ctl->get());
-
   tx->join();
   rx->join();
 
index 7df29b4..39cde37 100644 (file)
 
 namespace simgrid {
 namespace vm {
+class VmMigrationExt {
+public:
+  s4u::ActorPtr issuer_ = nullptr;
+  s4u::ActorPtr tx_     = nullptr;
+  s4u::ActorPtr rx_     = nullptr;
+  static simgrid::xbt::Extension<simgrid::s4u::Host, VmMigrationExt> EXTENSION_ID;
+  virtual ~VmMigrationExt() = default;
+  explicit VmMigrationExt(s4u::ActorPtr issuer, s4u::ActorPtr rx, s4u::ActorPtr tx) : issuer_(issuer), tx_(tx), rx_(rx)
+  {
+  }
+  static void ensureVmMigrationExtInstalled();
+};
 
 class MigrationRx {
   /* The miration_rx process uses mbox_ctl to let the caller of do_migration()  know the completion of the migration. */
index 3ca1033..848098e 100644 (file)
@@ -13,6 +13,8 @@
 
 XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_vm, "S4U virtual machines");
 
+simgrid::xbt::signal<void(simgrid::s4u::VirtualMachine*)> simgrid::s4u::VirtualMachine::onVmShutdown;
+
 namespace simgrid {
 namespace s4u {
 
@@ -128,13 +130,11 @@ void VirtualMachine::shutdown()
 {
   smx_actor_t issuer = SIMIX_process_self();
   simgrid::simix::kernelImmediate([this, issuer]() { pimpl_vm_->shutdown(issuer); });
+  onVmShutdown(this);
 }
 
 void VirtualMachine::destroy()
 {
-  if (isMigrating())
-    THROWF(vm_error, 0, "Cannot destroy VM '%s', which is migrating.", getCname());
-
   /* First, terminate all processes on the VM if necessary */
   shutdown();
 
index 6c3710b..e5232cc 100644 (file)
@@ -104,7 +104,7 @@ static void onExecCompletion(simgrid::kernel::activity::ExecImplPtr exec)
 
 SG_BEGIN_DECL()
 
-void sg_vm_live_migration_plugin_init()
+void sg_vm_dirty_page_tracking_init()
 {
   if (not simgrid::vm::VmDirtyPageTrackingExt::EXTENSION_ID.valid()) {
     simgrid::vm::VmDirtyPageTrackingExt::EXTENSION_ID =
index e753c24..cdcb343 100644 (file)
@@ -1,4 +1,7 @@
-foreach(x actor comm-pt2pt concurrent_rw host_on_off_wait listen_async pid storage_client_server)
+foreach(x actor
+        comm-pt2pt
+        cloud-interrupt-migration
+        concurrent_rw storage_client_server host_on_off_wait listen_async pid )
   add_executable       (${x}  ${x}/${x}.cpp)
   target_link_libraries(${x}  simgrid)
   set_target_properties(${x}  PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x})
@@ -8,7 +11,7 @@ endforeach()
 
 ## Add the tests.
 ## Some need to be run with all factories, some need not tesh to run
-foreach(x actor concurrent_rw)
+foreach(x actor cloud-interrupt-migration concurrent_rw)
   set(tesh_files    ${tesh_files}    ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh)
   ADD_TESH_FACTORIES(tesh-s4u-${x} "thread;ucontext;raw;boost" --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} ${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x}/${x}.tesh)
 endforeach()
diff --git a/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.cpp
new file mode 100644 (file)
index 0000000..afb79fb
--- /dev/null
@@ -0,0 +1,72 @@
+/* Copyright (c) 2017. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "simgrid/plugins/live_migration.h"
+#include "simgrid/s4u.hpp"
+#include "simgrid/s4u/VirtualMachine.hpp"
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_cloud_interrupt_migration, "Messages specific for this example");
+
+static void vm_migrate(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm)
+{
+  simgrid::s4u::Host* src_pm = vm->getPm();
+  double mig_sta             = simgrid::s4u::Engine::getClock();
+  sg_vm_migrate(vm, dst_pm);
+  double mig_end = simgrid::s4u::Engine::getClock();
+
+  XBT_INFO("%s migrated: %s->%s in %g s", vm->getCname(), src_pm->getCname(), dst_pm->getCname(), mig_end - mig_sta);
+}
+
+static simgrid::s4u::ActorPtr vm_migrate_async(simgrid::s4u::VirtualMachine* vm, simgrid::s4u::Host* dst_pm)
+{
+  return simgrid::s4u::Actor::createActor("mig_wrk", simgrid::s4u::Host::current(), vm_migrate, vm, dst_pm);
+}
+
+static void master_main()
+{
+  simgrid::s4u::Host* pm0 = simgrid::s4u::Host::by_name("Fafard");
+  simgrid::s4u::Host* pm1 = simgrid::s4u::Host::by_name("Tremblay");
+
+  simgrid::s4u::VirtualMachine* vm0 = new simgrid::s4u::VirtualMachine("VM0", pm0, 1);
+  vm0->setRamsize(1e9); // 1Gbytes
+  vm0->start();
+
+  XBT_INFO("Start the migration of %s from %s to %s", vm0->getCname(), pm0->getCname(), pm1->getCname());
+  simgrid::s4u::ActorPtr migration = vm_migrate_async(vm0, pm1);
+
+  simgrid::s4u::this_actor::sleep_for(2);
+  XBT_INFO("Wait! change my mind, shutdown %s. This ends the migration", vm0->getCname());
+  vm0->shutdown();
+
+  simgrid::s4u::this_actor::sleep_for(8);
+
+  XBT_INFO("Start again the migration of %s from %s to %s", vm0->getCname(), pm0->getCname(), pm1->getCname());
+
+  vm0->start();
+  vm_migrate_async(vm0, pm1);
+
+  XBT_INFO("Wait for the completion of the migration this time");
+  simgrid::s4u::this_actor::sleep_for(200);
+  vm0->destroy();
+}
+
+int main(int argc, char* argv[])
+{
+  /* Get the arguments */
+  simgrid::s4u::Engine e(&argc, argv);
+  sg_vm_live_migration_plugin_init();
+
+  /* load the platform file */
+  e.loadPlatform(argv[1]);
+
+  simgrid::s4u::Actor::createActor("master_", simgrid::s4u::Host::by_name("Fafard"), master_main);
+
+  e.run();
+
+  XBT_INFO("Bye (simulation time %g)", simgrid::s4u::Engine::getClock());
+
+  return 0;
+}
diff --git a/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh b/teshsuite/s4u/cloud-interrupt-migration/cloud-interrupt-migration.tesh
new file mode 100644 (file)
index 0000000..fe95a5b
--- /dev/null
@@ -0,0 +1,7 @@
+$ $SG_TEST_EXENV ${bindir:=.}/cloud-interrupt-migration ${platfdir}/small_platform.xml --log=no_loc "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n"
+> [  0.000000] (1:master_@Fafard) Start the migration of VM0 from Fafard to Tremblay
+> [  2.000000] (1:master_@Fafard) Wait! change my mind, shutdown VM0. This ends the migration
+> [ 10.000000] (1:master_@Fafard) Start again the migration of VM0 from Fafard to Tremblay
+> [ 10.000000] (1:master_@Fafard) Wait for the completion of the migration this time
+> [142.765801] (5:mig_wrk@Fafard) VM0 migrated: Fafard->Tremblay in 132.766 s
+> [210.000000] (0:maestro@) Bye (simulation time 210)