X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/0a8e3d1982d853f0207c41b79cd91f4c6ebdd9f1..0eead244337fde8f0508629f8f8473f946c19f88:/src/msg/msg_vm.cpp diff --git a/src/msg/msg_vm.cpp b/src/msg/msg_vm.cpp index 6ca87e001e..83878d6522 100644 --- a/src/msg/msg_vm.cpp +++ b/src/msg/msg_vm.cpp @@ -13,6 +13,7 @@ #include "src/plugins/vm/VirtualMachineImpl.hpp" #include "src/plugins/vm/VmHostExt.hpp" +#include "src/simix/ActorImpl.hpp" #include #include @@ -172,7 +173,7 @@ msg_vm_t MSG_vm_create_core(msg_host_t pm, const char* name) void MSG_vm_destroy(msg_vm_t vm) { if (MSG_vm_is_migrating(vm)) - THROWF(vm_error, 0, "VM(%s) is migrating", vm->name().c_str()); + THROWF(vm_error, 0, "Cannot destroy VM '%s', which is migrating.", vm->cname()); /* First, terminate all processes on the VM if necessary */ if (MSG_vm_is_running(vm)) @@ -186,7 +187,7 @@ void MSG_vm_destroy(msg_vm_t vm) }); if (TRACE_msg_vm_is_enabled()) { - container_t container = PJ_container_get(vm->name().c_str()); + container_t container = PJ_container_get(vm->cname()); PJ_container_remove_from_parent(container); PJ_container_free(container); } @@ -195,14 +196,41 @@ void MSG_vm_destroy(msg_vm_t vm) /** @brief Start a vm (i.e., boot the guest operating system) * @ingroup msg_VMs * - * If the VM cannot be started, an exception is generated. + * If the VM cannot be started (because of memory overprovisionning), an exception is generated. */ void MSG_vm_start(msg_vm_t vm) { - simcall_vm_start(vm); + simgrid::simix::kernelImmediate([vm]() { + simgrid::vm::VmHostExt::ensureVmExtInstalled(); + + simgrid::s4u::VirtualMachine* typedVM = static_cast(vm); + simgrid::s4u::Host* pm = typedVM->pimpl_vm_->getPm(); + if (pm->extension() == nullptr) + pm->extension_set(new simgrid::vm::VmHostExt()); + + long pm_ramsize = pm->extension()->ramsize; + int pm_overcommit = pm->extension()->overcommit; + long vm_ramsize = typedVM->getRamsize(); + + if (pm_ramsize && !pm_overcommit) { /* Only verify that we don't overcommit on need */ + /* Retrieve the memory occupied by the VMs on that host. Yep, we have to traverse all VMs of all hosts for that */ + long total_ramsize_of_vms = 0; + for (simgrid::s4u::VirtualMachine* ws_vm : simgrid::vm::VirtualMachineImpl::allVms_) + if (pm == ws_vm->pimpl_vm_->getPm()) + total_ramsize_of_vms += ws_vm->pimpl_vm_->getRamsize(); + + if (vm_ramsize > pm_ramsize - total_ramsize_of_vms) { + XBT_WARN("cannnot start %s@%s due to memory shortage: vm_ramsize %ld, free %ld, pm_ramsize %ld (bytes).", + vm->cname(), pm->cname(), vm_ramsize, pm_ramsize - total_ramsize_of_vms, pm_ramsize); + THROWF(vm_error, 0, "Memory shortage on host '%s', VM '%s' cannot be started", pm->cname(), vm->cname()); + } + } + + typedVM->pimpl_vm_->setState(SURF_VM_STATE_RUNNING); + }); if (TRACE_msg_vm_is_enabled()) { - container_t vm_container = PJ_container_get(vm->name().c_str()); + container_t vm_container = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_STATE", vm_container->type); val_t value = PJ_value_get_or_new("start", "0 0 1", type); // start is blue new_pajePushState(MSG_get_clock(), vm_container, type, value); @@ -218,55 +246,25 @@ void MSG_vm_start(msg_vm_t vm) void MSG_vm_shutdown(msg_vm_t vm) { simcall_vm_shutdown(vm); - MSG_process_sleep(0.); // Make sure that the processes in the VM are killed in this scheduling round before processing - // (eg with the VM destroy) -} -/* We have two mailboxes. mbox is used to transfer migration data between source and destination PMs. mbox_ctl is used - * to detect the completion of a migration. The names of these mailboxes must not conflict with others. */ -static inline char *get_mig_mbox_src_dst(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) -{ - const char *vm_name = sg_host_get_name(vm); - const char *src_pm_name = sg_host_get_name(src_pm); - const char *dst_pm_name = sg_host_get_name(dst_pm); - - return bprintf("__mbox_mig_src_dst:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name); -} - -static inline char *get_mig_mbox_ctl(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) -{ - const char *vm_name = sg_host_get_name(vm); - const char *src_pm_name = sg_host_get_name(src_pm); - const char *dst_pm_name = sg_host_get_name(dst_pm); - - return bprintf("__mbox_mig_ctl:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name); + // Make sure that the processes in the VM are killed in this scheduling round before processing + // (eg with the VM destroy) + MSG_process_sleep(0.); } static inline char *get_mig_process_tx_name(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) { - const char *vm_name = sg_host_get_name(vm); - const char *src_pm_name = sg_host_get_name(src_pm); - const char *dst_pm_name = sg_host_get_name(dst_pm); - - return bprintf("__pr_mig_tx:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name); + return bprintf("__pr_mig_tx:%s(%s-%s)", vm->cname(), src_pm->cname(), dst_pm->cname()); } static inline char *get_mig_process_rx_name(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) { - const char *vm_name = sg_host_get_name(vm); - const char *src_pm_name = sg_host_get_name(src_pm); - const char *dst_pm_name = sg_host_get_name(dst_pm); - - return bprintf("__pr_mig_rx:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name); + return bprintf("__pr_mig_rx:%s(%s-%s)", vm->cname(), src_pm->cname(), dst_pm->cname()); } static inline char *get_mig_task_name(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm, int stage) { - const char *vm_name = sg_host_get_name(vm); - const char *src_pm_name = sg_host_get_name(src_pm); - const char *dst_pm_name = sg_host_get_name(dst_pm); - - return bprintf("__task_mig_stage%d:%s(%s-%s)", stage, vm_name, src_pm_name, dst_pm_name); + return bprintf("__task_mig_stage%d:%s(%s-%s)", stage, vm->cname(), src_pm->cname(), dst_pm->cname()); } struct migration_session { @@ -288,34 +286,26 @@ static int migration_rx_fun(int argc, char *argv[]) // The structure has been created in the do_migration function and should only be freed in the same place ;) struct migration_session *ms = (migration_session *) MSG_process_get_data(MSG_process_self()); - s_vm_params_t params; - static_cast(ms->vm)->parameters(¶ms); - - int need_exit = 0; + bool received_finalize = false; char *finalize_task_name = get_mig_task_name(ms->vm, ms->src_pm, ms->dst_pm, 3); - - int ret = 0; - for (;;) { + while (!received_finalize) { msg_task_t task = nullptr; - ret = MSG_task_recv(&task, ms->mbox); - { - if (ret != MSG_OK) { - // An error occurred, clean the code and return - // The owner did not change, hence the task should be only destroyed on the other side - xbt_free(finalize_task_name); - return 0; - } + int ret = MSG_task_recv(&task, ms->mbox); + + if (ret != MSG_OK) { + // An error occurred, clean the code and return + // The owner did not change, hence the task should be only destroyed on the other side + xbt_free(finalize_task_name); + return 0; } if (strcmp(task->name, finalize_task_name) == 0) - need_exit = 1; + received_finalize = 1; MSG_task_destroy(task); - - if (need_exit) - break; } + xbt_free(finalize_task_name); // Here Stage 1, 2 and 3 have been performed. // Hence complete the migration @@ -343,8 +333,7 @@ static int migration_rx_fun(int argc, char *argv[]) { // Now the VM is running on the new host (the migration is completed) (even if the SRC crash) vm->pimpl_vm_->isMigrating = false; - XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", sg_host_get_name(ms->vm), sg_host_get_name(ms->src_pm), - sg_host_get_name(ms->dst_pm)); + XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", ms->vm->cname(), ms->src_pm->cname(), ms->dst_pm->cname()); if (TRACE_msg_vm_is_enabled()) { static long long int counter = 0; @@ -352,20 +341,20 @@ static int migration_rx_fun(int argc, char *argv[]) snprintf(key, INSTR_DEFAULT_STR_SIZE, "%lld", counter++); // start link - container_t msg = PJ_container_get(vm->name().c_str()); + container_t msg = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_LINK", PJ_type_get_root()); new_pajeStartLink(MSG_get_clock(), PJ_container_get_root(), type, msg, "M", key); // destroy existing container of this vm - container_t existing_container = PJ_container_get(vm->name().c_str()); + container_t existing_container = PJ_container_get(vm->cname()); PJ_container_remove_from_parent(existing_container); PJ_container_free(existing_container); // create new container on the new_host location - PJ_container_new(vm->name().c_str(), INSTR_MSG_VM, PJ_container_get(sg_host_get_name(ms->dst_pm))); + PJ_container_new(vm->cname(), INSTR_MSG_VM, PJ_container_get(ms->dst_pm->cname())); // end link - msg = PJ_container_get(vm->name().c_str()); + msg = PJ_container_get(vm->cname()); type = PJ_type_get("MSG_VM_LINK", PJ_type_get_root()); new_pajeEndLink(MSG_get_clock(), PJ_container_get_root(), type, msg, "M", key); } @@ -388,21 +377,21 @@ static int migration_rx_fun(int argc, char *argv[]) xbt_free(task_name); } - xbt_free(finalize_task_name); - XBT_DEBUG("mig: rx_done"); return 0; } -static void reset_dirty_pages(msg_vm_t vm) +static void start_dirty_page_tracking(msg_vm_t vm) { - simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; + simgrid::vm::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; + + pimpl->dp_enabled = 1; + if (!pimpl->dp_objs) + return; char *key = nullptr; xbt_dict_cursor_t cursor = nullptr; dirty_page_t dp = nullptr; - if (!pimpl->dp_objs) - return; xbt_dict_foreach (pimpl->dp_objs, cursor, key, dp) { double remaining = MSG_task_get_flops_amount(dp->task); dp->prev_clock = MSG_get_clock(); @@ -412,13 +401,6 @@ static void reset_dirty_pages(msg_vm_t vm) } } -static void start_dirty_page_tracking(msg_vm_t vm) -{ - static_cast(vm)->pimpl_vm_->dp_enabled = 1; - - reset_dirty_pages(vm); -} - static void stop_dirty_page_tracking(msg_vm_t vm) { static_cast(vm)->pimpl_vm_->dp_enabled = 0; @@ -429,15 +411,15 @@ static double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remai double computed = dp->prev_remaining - remaining; double duration = clock - dp->prev_clock; - XBT_DEBUG("%s@%s: computed %f ops (remaining %f -> %f) in %f secs (%f -> %f)", - key, sg_host_get_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock); + XBT_DEBUG("%s@%s: computed %f ops (remaining %f -> %f) in %f secs (%f -> %f)", key, vm->cname(), computed, + dp->prev_remaining, remaining, duration, dp->prev_clock, clock); return computed; } static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug, int stage2_round_for_fancy_debug) { - simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; + simgrid::vm::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; double total = 0; char *key = nullptr; @@ -473,7 +455,7 @@ void MSG_host_add_task(msg_host_t host, msg_task_t task) simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); if (vm == nullptr) return; - simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; + simgrid::vm::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; double remaining = MSG_task_get_flops_amount(task); char *key = bprintf("%s-%p", task->name, task); @@ -485,10 +467,10 @@ void MSG_host_add_task(msg_host_t host, msg_task_t task) dp->prev_remaining = remaining; } if (!pimpl->dp_objs) - pimpl->dp_objs = xbt_dict_new(); + pimpl->dp_objs = xbt_dict_new_homogeneous(nullptr); xbt_assert(xbt_dict_get_or_null(pimpl->dp_objs, key) == nullptr); xbt_dict_set(pimpl->dp_objs, key, dp, nullptr); - XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_get_name(host), remaining, pimpl->dp_enabled); + XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, host->cname(), remaining, pimpl->dp_enabled); xbt_free(key); } @@ -498,7 +480,7 @@ void MSG_host_del_task(msg_host_t host, msg_task_t task) simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); if (vm == nullptr) return; - simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; + simgrid::vm::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; char *key = bprintf("%s-%p", task->name, task); dirty_page_t dp = (dirty_page_t)(pimpl->dp_objs ? xbt_dict_get_or_null(pimpl->dp_objs, key) : NULL); @@ -518,7 +500,7 @@ void MSG_host_del_task(msg_host_t host, msg_task_t task) xbt_dict_remove(pimpl->dp_objs, key); xbt_free(dp); - XBT_DEBUG("del %s on %s", key, sg_host_get_name(host)); + XBT_DEBUG("del %s on %s", key, host->cname()); xbt_free(key); } @@ -553,11 +535,11 @@ static sg_size_t send_migration_data(msg_vm_t vm, msg_host_t src_pm, msg_host_t if(ret == MSG_HOST_FAILURE){ //XBT_DEBUG("SRC host failed during migration of %s (stage %d)", sg_host_name(vm), stage); MSG_task_destroy(task); - THROWF(host_error, 0, "SRC host failed during migration of %s (stage %d)", sg_host_get_name(vm), stage); + THROWF(host_error, 0, "SRC host failed during migration of %s (stage %d)", vm->cname(), stage); }else if(ret == MSG_TRANSFER_FAILURE){ //XBT_DEBUG("DST host failed during migration of %s (stage %d)", sg_host_name(vm), stage); MSG_task_destroy(task); - THROWF(host_error, 0, "DST host failed during migration of %s (stage %d)", sg_host_get_name(vm), stage); + THROWF(host_error, 0, "DST host failed during migration of %s (stage %d)", vm->cname(), stage); } double clock_end = MSG_get_clock(); @@ -587,33 +569,6 @@ static sg_size_t get_updated_size(double computed, double dp_rate, double dp_cap return (sg_size_t) updated_size; } -static double send_stage1(struct migration_session* ms, sg_size_t ramsize, double mig_speed, double dp_rate, - double dp_cap) -{ - // const long chunksize = (sg_size_t)1024 * 1024 * 100; - const sg_size_t chunksize = (sg_size_t)1024 * 1024 * 100000; - sg_size_t remaining = ramsize; - double computed_total = 0; - - while (remaining > 0) { - sg_size_t datasize = chunksize; - if (remaining < chunksize) - datasize = remaining; - - remaining -= datasize; - send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, datasize, ms->mbox, 1, 0, mig_speed, -1); - double computed = lookup_computed_flop_counts(ms->vm, 1, 0); - computed_total += computed; - } - - return computed_total; -} - -static double get_threshold_value(double bandwidth, double max_downtime) -{ - return max_downtime * bandwidth; -} - static int migration_tx_fun(int argc, char *argv[]) { XBT_DEBUG("mig: tx_start"); @@ -632,9 +587,7 @@ static int migration_tx_fun(int argc, char *argv[]) const double mig_speed = params.mig_speed; double max_downtime = params.max_downtime; - /* hard code it temporally. Fix Me */ -#define MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME 10000000.0 - double mig_timeout = MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME; + double mig_timeout = 10000000.0; double remaining_size = (double) (ramsize + devsize); double threshold = 0.0; @@ -681,14 +634,13 @@ static int migration_tx_fun(int argc, char *argv[]) double clock_post_send = MSG_get_clock(); mig_timeout -= (clock_post_send - clock_prev_send); if (mig_timeout < 0) { - XBT_VERB("The duration of stage 1 exceeds the timeout value (%lf > %lf), skip stage 2", - (clock_post_send - clock_prev_send), MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME); + XBT_VERB("The duration of stage 1 exceeds the timeout value, skip stage 2"); skip_stage2 = 1; } /* estimate bandwidth */ double bandwidth = ramsize / (clock_post_send - clock_prev_send); - threshold = get_threshold_value(bandwidth, max_downtime); + threshold = bandwidth * max_downtime; XBT_DEBUG("actual bandwidth %f (MB/s), threshold %f", bandwidth / 1024 / 1024, threshold); } @@ -736,7 +688,7 @@ static int migration_tx_fun(int argc, char *argv[]) if (sent == updated_size) { /* timeout did not happen */ double bandwidth = updated_size / (clock_post_send - clock_prev_send); - threshold = get_threshold_value(bandwidth, max_downtime); + threshold = bandwidth * max_downtime; XBT_DEBUG("actual bandwidth %f, threshold %f", bandwidth / 1024 / 1024, threshold); remaining_size -= sent; stage2_round += 1; @@ -782,14 +734,51 @@ static int migration_tx_fun(int argc, char *argv[]) return 0; } -static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) +/** @brief Migrate the VM to the given host. + * @ingroup msg_VMs + */ +void MSG_vm_migrate(msg_vm_t vm, msg_host_t dst_pm) { + /* some thoughts: + * - One approach is ... + * We first create a new VM (i.e., destination VM) on the destination physical host. The destination VM will + * receive the state of the source + * VM over network. We will finally destroy the source VM. + * - This behavior is similar to the way of migration in the real world. + * Even before a migration is completed, we will see a destination VM, consuming resources. + * - We have to relocate all processes. The existing process migration code will work for this? + * - The name of the VM is a somewhat unique ID in the code. It is tricky for the destination VM? + * + * - Another one is ... + * We update the information of the given VM to place it to the destination physical host. + * + * The second one would be easier. + */ + + simgrid::s4u::VirtualMachine* typedVm = static_cast(vm); + simgrid::vm::VirtualMachineImpl* pimpl = typedVm->pimpl_vm_; + msg_host_t src_pm = pimpl->getPm(); + + if (src_pm->isOff()) + THROWF(vm_error, 0, "Cannot migrate VM '%s' from host '%s', which is offline.", vm->cname(), src_pm->cname()); + if (dst_pm->isOff()) + THROWF(vm_error, 0, "Cannot migrate VM '%s' to host '%s', which is offline.", vm->cname(), dst_pm->cname()); + if (!MSG_vm_is_running(vm)) + THROWF(vm_error, 0, "Cannot migrate VM '%s' that is not running yet.", vm->cname()); + if (typedVm->isMigrating()) + THROWF(vm_error, 0, "Cannot migrate VM '%s' that is already migrating.", vm->cname()); + + pimpl->isMigrating = true; + struct migration_session *ms = xbt_new(struct migration_session, 1); ms->vm = vm; ms->src_pm = src_pm; ms->dst_pm = dst_pm; - ms->mbox_ctl = get_mig_mbox_ctl(vm, src_pm, dst_pm); - ms->mbox = get_mig_mbox_src_dst(vm, src_pm, dst_pm); + + /* We have two mailboxes. mbox is used to transfer migration data between source and destination PMs. mbox_ctl is used + * to detect the completion of a migration. The names of these mailboxes must not conflict with others. */ + ms->mbox_ctl = bprintf("__mbox_mig_ctl:%s(%s-%s)", vm->cname(), src_pm->cname(), dst_pm->cname()); + ms->mbox = bprintf("__mbox_mig_src_dst:%s(%s-%s)", vm->cname(), src_pm->cname(), dst_pm->cname()); char *pr_rx_name = get_mig_process_rx_name(vm, src_pm, dst_pm); char *pr_tx_name = get_mig_process_tx_name(vm, src_pm, dst_pm); @@ -799,7 +788,7 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) argv[1] = nullptr; MSG_process_create_with_arguments(pr_rx_name, migration_rx_fun, ms, dst_pm, 1, argv); - char** argv = xbt_new(char*, 2); + argv = xbt_new(char*, 2); argv[0] = pr_tx_name; argv[1] = nullptr; MSG_process_create_with_arguments(pr_tx_name, migration_tx_fun, ms, src_pm, 1, argv); @@ -808,83 +797,33 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) XBT_DEBUG("wait for reception of the final ACK (i.e. migration has been correctly performed"); msg_task_t task = nullptr; msg_error_t ret = MSG_TIMEOUT; - while (ret == MSG_TIMEOUT && MSG_host_is_on(dst_pm)) // Wait while you receive the message o + while (ret == MSG_TIMEOUT && dst_pm->isOn()) // The rx will tell me when he gots the VM ret = MSG_task_receive_with_timeout(&task, ms->mbox_ctl, 4); + pimpl->isMigrating = false; + xbt_free(ms->mbox_ctl); xbt_free(ms->mbox); xbt_free(ms); - // xbt_assert(ret == MSG_OK); if (ret == MSG_HOST_FAILURE) { // Note that since the communication failed, the owner did not change and the task should be destroyed on the // other side. Hence, just throw the execption XBT_ERROR("SRC crashes, throw an exception (m-control)"); // MSG_process_kill(tx_process); // Adrien, I made a merge on Nov 28th 2014, I'm not sure whether this line is // required or not - return -1; + THROWF(host_error, 0, "Source host '%s' failed during the migration of VM '%s'.", src_pm->cname(), vm->cname()); } else if ((ret == MSG_TRANSFER_FAILURE) || (ret == MSG_TIMEOUT)) { // MSG_TIMEOUT here means that MSG_host_is_avail() returned false. XBT_ERROR("DST crashes, throw an exception (m-control)"); - return -2; + THROWF(host_error, 0, "Destination host '%s' failed during the migration of VM '%s'.", dst_pm->cname(), + vm->cname()); } char* expected_task_name = get_mig_task_name(vm, src_pm, dst_pm, 4); xbt_assert(strcmp(task->name, expected_task_name) == 0); xbt_free(expected_task_name); MSG_task_destroy(task); - return 0; -} - -/** @brief Migrate the VM to the given host. - * @ingroup msg_VMs - * - * FIXME: No migration cost occurs. If you want to simulate this too, you want to use a MSG_task_send() before or after, - * depending on whether you want to do cold or hot migration. - */ -void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm) -{ - /* some thoughts: - * - One approach is ... - * We first create a new VM (i.e., destination VM) on the destination physical host. The destination VM will - * receive the state of the source - * VM over network. We will finally destroy the source VM. - * - This behavior is similar to the way of migration in the real world. - * Even before a migration is completed, we will see a destination VM, consuming resources. - * - We have to relocate all processes. The existing process migration code will work for this? - * - The name of the VM is a somewhat unique ID in the code. It is tricky for the destination VM? - * - * - Another one is ... - * We update the information of the given VM to place it to the destination physical host. - * - * The second one would be easier. - */ - - simgrid::surf::VirtualMachineImpl* pimpl = static_cast(vm)->pimpl_vm_; - msg_host_t old_pm = pimpl->getPm(); - - if(MSG_host_is_off(old_pm)) - THROWF(vm_error, 0, "SRC host(%s) seems off, cannot start a migration", sg_host_get_name(old_pm)); - - if(MSG_host_is_off(new_pm)) - THROWF(vm_error, 0, "DST host(%s) seems off, cannot start a migration", sg_host_get_name(new_pm)); - - if (!MSG_vm_is_running(vm)) - THROWF(vm_error, 0, "VM(%s) is not running", sg_host_get_name(vm)); - - if (MSG_vm_is_migrating(vm)) - THROWF(vm_error, 0, "VM(%s) is already migrating", sg_host_get_name(vm)); - - pimpl->isMigrating = 1; - - int ret = do_migration(vm, old_pm, new_pm); - if (ret == -1) { - pimpl->isMigrating = 0; - THROWF(host_error, 0, "SRC host failed during migration"); - } else if (ret == -2) { - pimpl->isMigrating = 0; - THROWF(host_error, 0, "DST host failed during migration"); - } } /** @brief Immediately suspend the execution of all processes within the given VM. @@ -898,14 +837,14 @@ void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm) void MSG_vm_suspend(msg_vm_t vm) { if (MSG_vm_is_migrating(vm)) - THROWF(vm_error, 0, "VM(%s) is migrating", sg_host_get_name(vm)); + THROWF(vm_error, 0, "Cannot suspend VM '%s', which is migrating", vm->cname()); simcall_vm_suspend(vm); XBT_DEBUG("vm_suspend done"); if (TRACE_msg_vm_is_enabled()) { - container_t vm_container = PJ_container_get(vm->name().c_str()); + container_t vm_container = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_STATE", vm_container->type); val_t value = PJ_value_get_or_new("suspend", "1 0 0", type); // suspend is red new_pajePushState(MSG_get_clock(), vm_container, type, value); @@ -922,7 +861,7 @@ void MSG_vm_resume(msg_vm_t vm) simcall_vm_resume(vm); if (TRACE_msg_vm_is_enabled()) { - container_t vm_container = PJ_container_get(vm->name().c_str()); + container_t vm_container = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_STATE", vm_container->type); new_pajePopState(MSG_get_clock(), vm_container, type); } @@ -941,12 +880,12 @@ void MSG_vm_resume(msg_vm_t vm) void MSG_vm_save(msg_vm_t vm) { if (MSG_vm_is_migrating(vm)) - THROWF(vm_error, 0, "VM(%s) is migrating", sg_host_get_name(vm)); + THROWF(vm_error, 0, "Cannot save VM '%s', which is migrating.", vm->cname()); simcall_vm_save(vm); if (TRACE_msg_vm_is_enabled()) { - container_t vm_container = PJ_container_get(vm->name().c_str()); + container_t vm_container = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_STATE", vm_container->type); val_t value = PJ_value_get_or_new("save", "0 1 0", type); // save is green new_pajePushState(MSG_get_clock(), vm_container, type, value); @@ -961,10 +900,25 @@ void MSG_vm_save(msg_vm_t vm) */ void MSG_vm_restore(msg_vm_t vm) { - simcall_vm_restore(vm); + simgrid::simix::kernelImmediate([vm]() { + if (static_cast(vm)->pimpl_vm_->getState() != SURF_VM_STATE_SAVED) + THROWF(vm_error, 0, "VM(%s) was not saved", vm->cname()); + + XBT_DEBUG("restore VM(%s), where %d processes exist", vm->cname(), xbt_swag_size(sg_host_simix(vm)->process_list)); + + /* jump to vm_ws_restore() */ + static_cast(vm)->pimpl_vm_->restore(); + + smx_actor_t smx_process, smx_process_safe; + xbt_swag_foreach_safe(smx_process, smx_process_safe, sg_host_simix(vm)->process_list) + { + XBT_DEBUG("resume %s", smx_process->name.c_str()); + SIMIX_process_resume(smx_process); + } + }); if (TRACE_msg_vm_is_enabled()) { - container_t vm_container = PJ_container_get(vm->name().c_str()); + container_t vm_container = PJ_container_get(vm->cname()); type_t type = PJ_type_get("MSG_VM_STATE", vm_container->type); new_pajePopState(MSG_get_clock(), vm_container, type); } @@ -1006,5 +960,6 @@ msg_host_t MSG_vm_get_pm(msg_vm_t vm) */ void MSG_vm_set_bound(msg_vm_t vm, double bound) { - simcall_vm_set_bound(vm, bound); + simgrid::simix::kernelImmediate( + [vm, bound]() { static_cast(vm)->pimpl_vm_->setBound(bound); }); }