X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/539916de848562683cf2e5425d5160c2a43f135a..5397eadb7c0c0482c2a0e658f772d3c165f4a44a:/src/msg/msg_vm.c diff --git a/src/msg/msg_vm.c b/src/msg/msg_vm.c index 5591e08331..ac42d90c75 100644 --- a/src/msg/msg_vm.c +++ b/src/msg/msg_vm.c @@ -221,9 +221,7 @@ msg_vm_t MSG_vm_create_core(msg_host_t ind_pm, const char *name) XBT_DEBUG("A new VM (%s) has been created", name); - #ifdef HAVE_TRACING TRACE_msg_vm_create(name, ind_pm); - #endif return ind_vm; } @@ -250,9 +248,7 @@ void MSG_vm_destroy(msg_vm_t vm) __MSG_host_destroy(vm); - #ifdef HAVE_TRACING TRACE_msg_vm_end(vm); - #endif } @@ -266,9 +262,7 @@ void MSG_vm_start(msg_vm_t vm) { simcall_vm_start(vm); - #ifdef HAVE_TRACING TRACE_msg_vm_start(vm); - #endif } @@ -284,9 +278,7 @@ void MSG_vm_shutdown(msg_vm_t vm) /* msg_vm_t equals to msg_host_t */ simcall_vm_shutdown(vm); - // #ifdef HAVE_TRACING // TRACE_msg_vm_(vm); - // #endif } @@ -400,16 +392,18 @@ static int migration_rx_fun(int argc, char *argv[]) msg_host_t dst_pm = ms-> dst_pm; msg_host_priv_t priv = msg_host_resource_priv(vm); -// TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent state -// I should check with Takahiro in order to make this portion of code atomic - /* deinstall the current affinity setting for the CPU */ - simcall_vm_set_affinity(vm, src_pm, 0); - - /* Update the vm location */ - simcall_vm_migrate(vm, dst_pm); - - /* Resume the VM */ - simcall_vm_resume(vm); +// // TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent state +// // I should check with Takahiro in order to make this portion of code atomic +// /* deinstall the current affinity setting for the CPU */ +// simcall_vm_set_affinity(vm, src_pm, 0); +// +// /* Update the vm location */ +// simcall_vm_migrate(vm, dst_pm); +// +// /* Resume the VM */ +// simcall_vm_resume(vm); +// + simcall_vm_migratefrom_resumeto(vm, src_pm, dst_pm); /* install the affinity setting of the VM on the destination pm */ { @@ -425,10 +419,7 @@ static int migration_rx_fun(int argc, char *argv[]) msg_host_priv_t priv = msg_host_resource_priv(vm); priv->is_migrating = 0; XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", ms->vm->key, ms->src_pm->key, ms->dst_pm->key); - #ifdef HAVE_TRACING - TRACE_msg_vm_change_host(ms->vm, ms->src_pm, ms->dst_pm); - #endif - + TRACE_msg_vm_change_host(ms->vm, ms->src_pm, ms->dst_pm); } // Inform the SRC that the migration has been correctly performed { @@ -464,7 +455,7 @@ static void reset_dirty_pages(msg_vm_t vm) xbt_dict_cursor_t cursor = NULL; dirty_page_t dp = NULL; xbt_dict_foreach(priv->dp_objs, cursor, key, dp) { - double remaining = MSG_task_get_remaining_computation(dp->task); + double remaining = MSG_task_get_flops_amount(dp->task); dp->prev_clock = MSG_get_clock(); dp->prev_remaining = remaining; @@ -525,7 +516,7 @@ static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug xbt_dict_cursor_t cursor = NULL; dirty_page_t dp = NULL; xbt_dict_foreach(priv->dp_objs, cursor, key, dp) { - double remaining = MSG_task_get_remaining_computation(dp->task); + double remaining = MSG_task_get_flops_amount(dp->task); double clock = MSG_get_clock(); @@ -556,7 +547,7 @@ static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug void MSG_host_add_task(msg_host_t host, msg_task_t task) { msg_host_priv_t priv = msg_host_resource_priv(host); - double remaining = MSG_task_get_remaining_computation(task); + double remaining = MSG_task_get_flops_amount(task); char *key = bprintf("%s-%p", task->name, task); dirty_page_t dp = xbt_new0(s_dirty_page, 1); @@ -588,7 +579,7 @@ void MSG_host_del_task(msg_host_t host, msg_task_t task) * computation has been done until now, and keep the information for the * lookup_() function that will called soon. */ if (priv->dp_enabled) { - double remaining = MSG_task_get_remaining_computation(task); + double remaining = MSG_task_get_flops_amount(task); double clock = MSG_get_clock(); // double updated = calc_updated_pages(key, host, dp, remaining, clock); double updated = get_computed(key, host, dp, remaining, clock); @@ -746,6 +737,7 @@ static int migration_tx_fun(int argc, char *argv[]) /* At stage 1, we do not need timeout. We have to send all the memory * pages even though the duration of this tranfer exceeds the timeout * value. */ + XBT_INFO("Stage 1: Gonna send %llu", ramsize); sg_size_t sent = send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, ramsize, ms->mbox, 1, 0, mig_speed, -1); remaining_size -= sent; computed_during_stage1 = lookup_computed_flop_counts(ms->vm, 1, 0); @@ -813,6 +805,7 @@ static int migration_tx_fun(int argc, char *argv[]) sg_size_t sent = 0; double clock_prev_send = MSG_get_clock(); TRY { + XBT_DEBUG("Stage 2, gonna send %llu", updated_size); sent = send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, updated_size, ms->mbox, 2, stage2_round, mig_speed, mig_timeout); } CATCH_ANONYMOUS { //hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code) @@ -856,6 +849,7 @@ stage3: stop_dirty_page_tracking(ms->vm); TRY { + XBT_DEBUG("Stage 3: Gonna send %f", remaining_size); send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, remaining_size, ms->mbox, 3, 0, mig_speed, -1); } CATCH_ANONYMOUS { //hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code) @@ -865,7 +859,7 @@ stage3: } // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate effectively the VM on the DST node. - XBT_INFO("mig: tx_done"); + XBT_DEBUG("mig: tx_done"); return 0; } @@ -885,7 +879,7 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) char *pr_rx_name = get_mig_process_rx_name(vm, src_pm, dst_pm); char *pr_tx_name = get_mig_process_tx_name(vm, src_pm, dst_pm); - msg_process_t tx_process, rx_process; +// msg_process_t tx_process, rx_process; // MSG_process_create(pr_rx_name, migration_rx_fun, ms, dst_pm); // MSG_process_create(pr_tx_name, migration_tx_fun, ms, src_pm); #if 1 @@ -893,13 +887,13 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) char **argv = xbt_new(char *, 2); argv[0] = pr_rx_name; argv[1] = NULL; - rx_process = MSG_process_create_with_arguments(pr_rx_name, migration_rx_fun, ms, dst_pm, 1, argv); +/*rx_process = */ MSG_process_create_with_arguments(pr_rx_name, migration_rx_fun, ms, dst_pm, 1, argv); } { char **argv = xbt_new(char *, 2); argv[0] = pr_tx_name; argv[1] = NULL; - tx_process = MSG_process_create_with_arguments(pr_tx_name, migration_tx_fun, ms, src_pm, 1, argv); +/* tx_process = */MSG_process_create_with_arguments(pr_tx_name, migration_tx_fun, ms, src_pm, 1, argv); } #endif @@ -909,7 +903,7 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) msg_task_t task = NULL; msg_error_t ret = MSG_TIMEOUT; while (ret == MSG_TIMEOUT && MSG_host_is_on(dst_pm)) //Wait while you receive the message o - ret = MSG_task_receive_with_timeout(&task, ms->mbox_ctl, 3); + ret = MSG_task_receive_with_timeout(&task, ms->mbox_ctl, 4); xbt_free(ms->mbox_ctl); xbt_free(ms->mbox); @@ -920,7 +914,7 @@ static int do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm) // Note that since the communication failed, the owner did not change and the task should be destroyed on the other side. // Hence, just throw the execption XBT_INFO("SRC crashes, throw an exception (m-control)"); - MSG_process_kill(tx_process); // Adrien, I made a merge on Nov 28th 2014, I'm not sure whether this line is required or not + //MSG_process_kill(tx_process); // Adrien, I made a merge on Nov 28th 2014, I'm not sure whether this line is required or not return -1; } else if((ret == MSG_TRANSFER_FAILURE) || (ret == MSG_TIMEOUT)){ // MSG_TIMEOUT here means that MSG_host_is_avail() returned false. @@ -972,6 +966,12 @@ void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm) msg_host_t old_pm = simcall_vm_get_pm(vm); + if(MSG_host_is_off(old_pm)) + THROWF(vm_error, 0, "SRC host(%s) seems off, cannot start a migration", sg_host_name(old_pm)); + + if(MSG_host_is_off(new_pm)) + THROWF(vm_error, 0, "DST host(%s) seems off, cannot start a migration", sg_host_name(new_pm)); + if (!MSG_vm_is_running(vm)) THROWF(vm_error, 0, "VM(%s) is not running", sg_host_name(vm)); @@ -997,9 +997,7 @@ void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm) // This part is done in the RX code, to handle the corner case where SRC can crash just at the end of the migration process // In that case, the VM has been already assigned to the DST node. //XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm->key, old_pm->key, new_pm->key); - //#ifdef HAVE_TRACING //TRACE_msg_vm_change_host(vm, old_pm, new_pm); - //#endif } @@ -1020,9 +1018,7 @@ void MSG_vm_suspend(msg_vm_t vm) XBT_DEBUG("vm_suspend done"); - #ifdef HAVE_TRACING TRACE_msg_vm_suspend(vm); - #endif } @@ -1035,9 +1031,7 @@ void MSG_vm_resume(msg_vm_t vm) { simcall_vm_resume(vm); - #ifdef HAVE_TRACING TRACE_msg_vm_resume(vm); - #endif } @@ -1057,9 +1051,7 @@ void MSG_vm_save(msg_vm_t vm) THROWF(vm_error, 0, "VM(%s) is migrating", sg_host_name(vm)); simcall_vm_save(vm); - #ifdef HAVE_TRACING TRACE_msg_vm_save(vm); - #endif } /** @brief Restore the execution of the VM. All processes on the VM run again. @@ -1073,9 +1065,7 @@ void MSG_vm_restore(msg_vm_t vm) { simcall_vm_restore(vm); - #ifdef HAVE_TRACING TRACE_msg_vm_restore(vm); - #endif }