From: Martin Quinson Date: Wed, 16 Nov 2016 21:49:18 +0000 (+0100) Subject: move the VM-related data out of MSG's private data for hosts X-Git-Tag: v3_14~190 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/128ea0e762c5eb7cd5c5a4be1cb7f745592be979?hp=19df82877ce519ee7ce4b9daee206ad6939f4710 move the VM-related data out of MSG's private data for hosts This is the beginning of turning the VMs into a separate plugin. I'm sure that reconcentrating all VM code into a few files will greatly simplify it. This change is really messy because I did not want to be too intrusive in msg_vm.c and had to expose much much of the VM internals for that. It will be improved in the future (it cannot get any worse). This reveals a little bug where the dirtyPages things of a VM that runs on a host which turns down are not properly cleaned up. There is nothing I can do right now as I don't have a onHostOff event or something, so I simply spit a warning in this case. For now. --- diff --git a/include/simgrid/s4u/VirtualMachine.hpp b/include/simgrid/s4u/VirtualMachine.hpp index de7123b313..b6863b327b 100644 --- a/include/simgrid/s4u/VirtualMachine.hpp +++ b/include/simgrid/s4u/VirtualMachine.hpp @@ -36,6 +36,8 @@ private: virtual ~VirtualMachine(); public: + bool isMigrating(); + void parameters(vm_params_t params); void setParameters(vm_params_t params); }; diff --git a/src/msg/msg_gos.cpp b/src/msg/msg_gos.cpp index 6c2ea24a9c..74d22e2a3f 100644 --- a/src/msg/msg_gos.cpp +++ b/src/msg/msg_gos.cpp @@ -29,7 +29,6 @@ msg_error_t MSG_task_execute(msg_task_t task) msg_error_t ret = MSG_parallel_task_execute(task); MSG_host_del_task(host, task); - return ret; } diff --git a/src/msg/msg_host.cpp b/src/msg/msg_host.cpp index 28bd9bbf3b..6ae7ac51f1 100644 --- a/src/msg/msg_host.cpp +++ b/src/msg/msg_host.cpp @@ -29,11 +29,6 @@ msg_host_t __MSG_host_create(sg_host_t host) // FIXME: don't return our paramete { msg_host_priv_t priv = xbt_new0(s_msg_host_priv_t, 1); - priv->dp_objs = nullptr; - priv->dp_enabled = 0; - priv->dp_updated_by_deleted_tasks = 0; - priv->is_migrating = 0; - priv->file_descriptor_table = nullptr; sg_host_msg_set(host,priv); @@ -114,10 +109,6 @@ void __MSG_host_priv_free(msg_host_priv_t priv) { if (priv == nullptr) return; - unsigned int size = priv->dp_objs ? xbt_dict_size(priv->dp_objs) : 0; - if (size > 0) - XBT_WARN("dp_objs: %u pending task?", size); - xbt_dict_free(&priv->dp_objs); delete priv->file_descriptor_table; free(priv); } diff --git a/src/msg/msg_private.h b/src/msg/msg_private.h index 6bca0ba3e2..4cb4b3e11e 100644 --- a/src/msg/msg_private.h +++ b/src/msg/msg_private.h @@ -29,10 +29,6 @@ SG_BEGIN_DECL() /**************** datatypes **********************************/ /********************************* Host **************************************/ typedef struct s_msg_host_priv { - int dp_enabled; - xbt_dict_t dp_objs; - double dp_updated_by_deleted_tasks; - int is_migrating; std::vector *file_descriptor_table; } s_msg_host_priv_t; diff --git a/src/msg/msg_vm.cpp b/src/msg/msg_vm.cpp index 21ec878a74..8c5a0803cb 100644 --- a/src/msg/msg_vm.cpp +++ b/src/msg/msg_vm.cpp @@ -11,6 +11,7 @@ #include +#include "src/surf/VirtualMachineImpl.hpp" #include #include @@ -133,8 +134,7 @@ int MSG_vm_is_running(msg_vm_t vm) */ int MSG_vm_is_migrating(msg_vm_t vm) { - msg_host_priv_t priv = sg_host_msg(vm); - return priv->is_migrating; + return static_cast(vm)->isMigrating(); } /** @brief Returns whether the given VM is currently suspended, not running. @@ -388,25 +388,25 @@ static int migration_rx_fun(int argc, char *argv[]) // Copy the reference to the vm (if SRC crashes now, do_migration will free ms) // This is clearly ugly but I (Adrien) need more time to do something cleaner (actually we should copy the whole ms // structure at the beginning and free it at the end of each function) - msg_vm_t vm = ms->vm; - msg_host_t src_pm = ms->src_pm; - msg_host_t dst_pm = ms-> dst_pm; - -// TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent -// state. I should check with Takahiro in order to make this portion of code atomic -// -// /* Update the vm location */ -// simcall_vm_migrate(vm, dst_pm); -// -// /* Resume the VM */ -// simcall_vm_resume(vm); -// - simcall_vm_migratefrom_resumeto(vm, src_pm, dst_pm); + simgrid::s4u::VirtualMachine* vm = static_cast(ms->vm); + msg_host_t src_pm = ms->src_pm; + msg_host_t dst_pm = ms->dst_pm; + + // TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an + // inconsistent + // state. I should check with Takahiro in order to make this portion of code atomic + // + // /* Update the vm location */ + // simcall_vm_migrate(vm, dst_pm); + // + // /* Resume the VM */ + // simcall_vm_resume(vm); + // + simcall_vm_migratefrom_resumeto(vm, src_pm, dst_pm); { // Now the VM is running on the new host (the migration is completed) (even if the SRC crash) - msg_host_priv_t priv = sg_host_msg(vm); - priv->is_migrating = 0; + static_cast(vm->pimpl_)->isMigrating = false; XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", sg_host_get_name(ms->vm), sg_host_get_name(ms->src_pm), sg_host_get_name(ms->dst_pm)); TRACE_msg_vm_change_host(ms->vm, ms->src_pm, ms->dst_pm); @@ -437,13 +437,14 @@ static int migration_rx_fun(int argc, char *argv[]) static void reset_dirty_pages(msg_vm_t vm) { - msg_host_priv_t priv = sg_host_msg(vm); + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); char *key = nullptr; xbt_dict_cursor_t cursor = nullptr; dirty_page_t dp = nullptr; - if (!priv->dp_objs) return; - xbt_dict_foreach(priv->dp_objs, cursor, key, dp) { + if (!pimpl->dp_objs) + return; + xbt_dict_foreach (pimpl->dp_objs, cursor, key, dp) { double remaining = MSG_task_get_flops_amount(dp->task); dp->prev_clock = MSG_get_clock(); dp->prev_remaining = remaining; @@ -454,16 +455,16 @@ static void reset_dirty_pages(msg_vm_t vm) static void start_dirty_page_tracking(msg_vm_t vm) { - msg_host_priv_t priv = sg_host_msg(vm); - priv->dp_enabled = 1; + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); + pimpl->dp_enabled = 1; reset_dirty_pages(vm); } static void stop_dirty_page_tracking(msg_vm_t vm) { - msg_host_priv_t priv = sg_host_msg(vm); - priv->dp_enabled = 0; + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); + pimpl->dp_enabled = 0; } static double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock) @@ -479,13 +480,13 @@ static double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remai static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug, int stage2_round_for_fancy_debug) { - msg_host_priv_t priv = sg_host_msg(vm); + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); double total = 0; char *key = nullptr; xbt_dict_cursor_t cursor = nullptr; dirty_page_t dp = nullptr; - xbt_dict_foreach(priv->dp_objs, cursor, key, dp) { + xbt_dict_foreach (pimpl->dp_objs, cursor, key, dp) { double remaining = MSG_task_get_flops_amount(dp->task); double clock = MSG_get_clock(); @@ -497,65 +498,70 @@ static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug dp->prev_clock = clock; } - total += priv->dp_updated_by_deleted_tasks; + total += pimpl->dp_updated_by_deleted_tasks; - XBT_DEBUG("mig-stage%d.%d: computed %f flop_counts (including %f by deleted tasks)", - stage_for_fancy_debug, stage2_round_for_fancy_debug, total, priv->dp_updated_by_deleted_tasks); + XBT_DEBUG("mig-stage%d.%d: computed %f flop_counts (including %f by deleted tasks)", stage_for_fancy_debug, + stage2_round_for_fancy_debug, total, pimpl->dp_updated_by_deleted_tasks); - priv->dp_updated_by_deleted_tasks = 0; + pimpl->dp_updated_by_deleted_tasks = 0; return total; } // TODO Is this code redundant with the information provided by // msg_process_t MSG_process_create(const char *name, xbt_main_func_t code, void *data, msg_host_t host) +/** @brief take care of the dirty page tracking, in case we're adding a task to a migrating VM */ void MSG_host_add_task(msg_host_t host, msg_task_t task) { - msg_host_priv_t priv = sg_host_msg(host); + simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); + if (vm == nullptr) + return; + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); + double remaining = MSG_task_get_flops_amount(task); char *key = bprintf("%s-%p", task->name, task); dirty_page_t dp = xbt_new0(s_dirty_page, 1); dp->task = task; - - /* It should be okay that we add a task onto a migrating VM. */ - if (priv->dp_enabled) { + if (pimpl->dp_enabled) { dp->prev_clock = MSG_get_clock(); dp->prev_remaining = remaining; } - if(!priv->dp_objs) priv->dp_objs = xbt_dict_new(); - xbt_assert(xbt_dict_get_or_null(priv->dp_objs, key) == nullptr); - xbt_dict_set(priv->dp_objs, key, dp, nullptr); - XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_get_name(host), remaining, priv->dp_enabled); + if (!pimpl->dp_objs) + pimpl->dp_objs = xbt_dict_new(); + xbt_assert(xbt_dict_get_or_null(pimpl->dp_objs, key) == nullptr); + xbt_dict_set(pimpl->dp_objs, key, dp, nullptr); + XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_get_name(host), remaining, pimpl->dp_enabled); xbt_free(key); } void MSG_host_del_task(msg_host_t host, msg_task_t task) { - msg_host_priv_t priv = sg_host_msg(host); + simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); + if (vm == nullptr) + return; + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); char *key = bprintf("%s-%p", task->name, task); - - dirty_page_t dp = (dirty_page_t) (priv->dp_objs ? xbt_dict_get_or_null(priv->dp_objs, key) : NULL); + dirty_page_t dp = (dirty_page_t)(pimpl->dp_objs ? xbt_dict_get_or_null(pimpl->dp_objs, key) : NULL); xbt_assert(dp->task == task); /* If we are in the middle of dirty page tracking, we record how much computation has been done until now, and keep * the information for the lookup_() function that will called soon. */ - if (priv->dp_enabled) { + if (pimpl->dp_enabled) { double remaining = MSG_task_get_flops_amount(task); double clock = MSG_get_clock(); // double updated = calc_updated_pages(key, host, dp, remaining, clock); double updated = get_computed(key, host, dp, remaining, clock); - priv->dp_updated_by_deleted_tasks += updated; + pimpl->dp_updated_by_deleted_tasks += updated; } - if(priv->dp_objs) - xbt_dict_remove(priv->dp_objs, key); + if (pimpl->dp_objs) + xbt_dict_remove(pimpl->dp_objs, key); xbt_free(dp); XBT_DEBUG("del %s on %s", key, sg_host_get_name(host)); - xbt_free(key); } @@ -922,18 +928,18 @@ void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm) if (MSG_vm_is_migrating(vm)) THROWF(vm_error, 0, "VM(%s) is already migrating", sg_host_get_name(vm)); - msg_host_priv_t priv = sg_host_msg(vm); - priv->is_migrating = 1; + simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm->pimpl_); + pimpl->isMigrating = 1; { int ret = do_migration(vm, old_pm, new_pm); if (ret == -1){ - priv->is_migrating = 0; - THROWF(host_error, 0, "SRC host failed during migration"); + pimpl->isMigrating = 0; + THROWF(host_error, 0, "SRC host failed during migration"); } else if(ret == -2){ - priv->is_migrating = 0; - THROWF(host_error, 0, "DST host failed during migration"); + pimpl->isMigrating = 0; + THROWF(host_error, 0, "DST host failed during migration"); } } diff --git a/src/s4u/s4u_VirtualMachine.cpp b/src/s4u/s4u_VirtualMachine.cpp index 8a8e190c39..d6d976e38c 100644 --- a/src/s4u/s4u_VirtualMachine.cpp +++ b/src/s4u/s4u_VirtualMachine.cpp @@ -24,6 +24,11 @@ VirtualMachine::~VirtualMachine() onDestruction(*this); } +bool VirtualMachine::isMigrating() +{ + return static_cast(pimpl_)->isMigrating; +} + /** @brief Retrieve a copy of the parameters of that VM/PM * @details The ramsize and overcommit fields are used on the PM too */ void VirtualMachine::parameters(vm_params_t params) diff --git a/src/simix/smx_host.cpp b/src/simix/smx_host.cpp index bd6371ed10..b7ff8a58c8 100644 --- a/src/simix/smx_host.cpp +++ b/src/simix/smx_host.cpp @@ -91,9 +91,8 @@ void SIMIX_host_off(sg_host_t h, smx_actor_t issuer) smx_actor_t process = nullptr; xbt_swag_foreach(process, host->process_list) { SIMIX_process_kill(process, issuer); - XBT_DEBUG("Killing %s on %s by %s", - process->name.c_str(), sg_host_get_name(process->host), - issuer->name.c_str()); + XBT_DEBUG("Killing %s@%s on behalf of %s", process->name.c_str(), sg_host_get_name(process->host), + issuer->name.c_str()); } } } else { diff --git a/src/surf/VirtualMachineImpl.cpp b/src/surf/VirtualMachineImpl.cpp index fbf47e5ae2..97b5613bdb 100644 --- a/src/surf/VirtualMachineImpl.cpp +++ b/src/surf/VirtualMachineImpl.cpp @@ -131,14 +131,31 @@ VirtualMachineImpl::VirtualMachineImpl(simgrid::s4u::Host* piface, simgrid::s4u: xbt_dynar_length(storage_)); } -/* - * A physical host does not disappear in the current SimGrid code, but a VM may disappear during a simulation. - */ +extern "C" int + xbt_log_no_loc; /* ugly pimpl to ensure that the debug info in the known issue below don't break the test */ +/** @brief A physical host does not disappear in the current SimGrid code, but a VM may disappear during a simulation */ VirtualMachineImpl::~VirtualMachineImpl() { onVmDestruction(this); allVms_.erase(find(allVms_.begin(), allVms_.end(), this)); + /* dirty page tracking */ + unsigned int size = xbt_dict_size(dp_objs); + static bool already_warned = false; + if (size > 0 && !already_warned) { + xbt_dict_cursor_t cursor = nullptr; + xbt_dict_cursor_first(dp_objs, &cursor); + XBT_WARN("Dirty page tracking: %u pending task(s) on a destroyed VM (first one is %s).\n" + "If you don't understand why your task was not properly removed, please report that bug.\n" + "This is a known bug if you turned the host off during the VM execution.\n" + "Please remind us of that problem at some point: our code base is not ready to fix this harmless issue in " + "2016, sorry.", + size, (xbt_log_no_loc ? "(name hidden)" : xbt_dict_cursor_get_key(cursor))); + xbt_dict_cursor_free(&cursor); + already_warned = true; + } + xbt_dict_free(&dp_objs); + /* Free the cpu_action of the VM. */ XBT_ATTRIB_UNUSED int ret = action_->unref(); xbt_assert(ret == 1, "Bug: some resource still remains"); diff --git a/src/surf/VirtualMachineImpl.hpp b/src/surf/VirtualMachineImpl.hpp index 8a8cc68461..53f948d22a 100644 --- a/src/surf/VirtualMachineImpl.hpp +++ b/src/surf/VirtualMachineImpl.hpp @@ -85,6 +85,11 @@ public: /* The vm object of the lower layer */ CpuAction* action_ = nullptr; + /* Dirty pages stuff */ + int dp_enabled = 0; + xbt_dict_t dp_objs = nullptr; + double dp_updated_by_deleted_tasks = 0; + protected: simgrid::s4u::Host* hostPM_; @@ -93,6 +98,8 @@ public: void setState(e_surf_vm_state_t state); static std::deque allVms_; + bool isMigrating = false; + private: s_vm_params_t params_; diff --git a/src/surf/VmHostExt.cpp b/src/surf/VmHostExt.cpp new file mode 100644 index 0000000000..a79060e73e --- /dev/null +++ b/src/surf/VmHostExt.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2013-2016. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "src/surf/VmHostExt.hpp" + +XBT_LOG_EXTERNAL_CATEGORY(surf_vm); +XBT_LOG_DEFAULT_CATEGORY(surf_vm); + +namespace simgrid { +namespace vm { +VmHostExt::~VmHostExt() +{ +} +} +} diff --git a/src/surf/VmHostExt.hpp b/src/surf/VmHostExt.hpp new file mode 100644 index 0000000000..6795ba2f9b --- /dev/null +++ b/src/surf/VmHostExt.hpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2004-2016. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include + +#include "src/surf/HostImpl.hpp" + +#ifndef VM_HOST_INFO_HPP_ +#define VM_HOST_INFO_HPP_ + +namespace simgrid { +namespace vm { +/** @brief Host extension for the VMs */ +class VmHostExt { + virtual ~VmHostExt(); + static simgrid::xbt::Extension EXTENSION_ID; +}; +} +} + +#endif /* VM_HOST_INFO_HPP_ */ diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh index 8d79c41251..dea218a436 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh @@ -84,6 +84,10 @@ $ ./host_on_off_processes ${srcdir:=.}/../../../examples/platforms/small_platfor > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Turn Jupiter off > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Shutdown vm0 > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Destroy vm0 +> [10.000000] [surf_vm/WARNING] Dirty page tracking: 1 pending task(s) on a destroyed VM (first one is (name hidden)). +> If you don't understand why your task was not properly removed, please report that bug. +> This is a known bug if you turned the host off during the VM execution. +> Please remind us of that problem at some point: our code base is not ready to fix this harmless issue in 2016, sorry. > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test 6 is also weird: when the node Jupiter is turned off once again, the VM and its daemon are not killed. However, the issue regarding the shutdown of hosted VMs can be seen a feature not a bug ;) > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test done. See you! > [10.000000] [msg_test/INFO] Simulation time 10 diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index c66284ecc9..b95d9cf067 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -82,6 +82,7 @@ set(EXTRA_DIST src/surf/surf_routing.hpp src/surf/PropertyHolder.hpp src/surf/VirtualMachineImpl.hpp + src/surf/VmHostExt.hpp src/surf/host_clm03.hpp src/surf/HostImpl.hpp src/surf/ptask_L07.hpp @@ -333,6 +334,7 @@ set(SURF_SRC src/surf/trace_mgr.hpp src/surf/trace_mgr.cpp src/surf/VirtualMachineImpl.cpp + src/surf/VmHostExt.cpp src/surf/host_clm03.cpp src/surf/HostImpl.cpp src/surf/ptask_L07.cpp