XBT_DEBUG("A new VM (%s) has been created", name);
- #ifdef HAVE_TRACING
TRACE_msg_vm_create(name, ind_pm);
- #endif
return ind_vm;
}
__MSG_host_destroy(vm);
- #ifdef HAVE_TRACING
TRACE_msg_vm_end(vm);
- #endif
}
{
simcall_vm_start(vm);
- #ifdef HAVE_TRACING
TRACE_msg_vm_start(vm);
- #endif
}
/* msg_vm_t equals to msg_host_t */
simcall_vm_shutdown(vm);
- // #ifdef HAVE_TRACING
// TRACE_msg_vm_(vm);
- // #endif
}
msg_host_t dst_pm = ms-> dst_pm;
msg_host_priv_t priv = msg_host_resource_priv(vm);
-// TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent state
-// I should check with Takahiro in order to make this portion of code atomic
- /* deinstall the current affinity setting for the CPU */
- simcall_vm_set_affinity(vm, src_pm, 0);
-
- /* Update the vm location */
- simcall_vm_migrate(vm, dst_pm);
-
- /* Resume the VM */
- simcall_vm_resume(vm);
+// // TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent state
+// // I should check with Takahiro in order to make this portion of code atomic
+// /* deinstall the current affinity setting for the CPU */
+// simcall_vm_set_affinity(vm, src_pm, 0);
+//
+// /* Update the vm location */
+// simcall_vm_migrate(vm, dst_pm);
+//
+// /* Resume the VM */
+// simcall_vm_resume(vm);
+//
+ simcall_vm_migratefrom_resumeto(vm, src_pm, dst_pm);
/* install the affinity setting of the VM on the destination pm */
{
msg_host_priv_t priv = msg_host_resource_priv(vm);
priv->is_migrating = 0;
XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", ms->vm->key, ms->src_pm->key, ms->dst_pm->key);
- #ifdef HAVE_TRACING
- TRACE_msg_vm_change_host(ms->vm, ms->src_pm, ms->dst_pm);
- #endif
-
+ TRACE_msg_vm_change_host(ms->vm, ms->src_pm, ms->dst_pm);
}
// Inform the SRC that the migration has been correctly performed
{
xbt_dict_cursor_t cursor = NULL;
dirty_page_t dp = NULL;
xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
- double remaining = MSG_task_get_remaining_computation(dp->task);
+ double remaining = MSG_task_get_flops_amount(dp->task);
dp->prev_clock = MSG_get_clock();
dp->prev_remaining = remaining;
xbt_dict_cursor_t cursor = NULL;
dirty_page_t dp = NULL;
xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
- double remaining = MSG_task_get_remaining_computation(dp->task);
+ double remaining = MSG_task_get_flops_amount(dp->task);
double clock = MSG_get_clock();
void MSG_host_add_task(msg_host_t host, msg_task_t task)
{
msg_host_priv_t priv = msg_host_resource_priv(host);
- double remaining = MSG_task_get_remaining_computation(task);
+ double remaining = MSG_task_get_flops_amount(task);
char *key = bprintf("%s-%p", task->name, task);
dirty_page_t dp = xbt_new0(s_dirty_page, 1);
* computation has been done until now, and keep the information for the
* lookup_() function that will called soon. */
if (priv->dp_enabled) {
- double remaining = MSG_task_get_remaining_computation(task);
+ double remaining = MSG_task_get_flops_amount(task);
double clock = MSG_get_clock();
// double updated = calc_updated_pages(key, host, dp, remaining, clock);
double updated = get_computed(key, host, dp, remaining, clock);
-static void send_migration_data(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm,
- sg_size_t size, char *mbox, int stage, int stage2_round, double mig_speed)
+static sg_size_t send_migration_data(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm,
+ sg_size_t size, char *mbox, int stage, int stage2_round, double mig_speed, double timeout)
{
+ sg_size_t sent = 0;
char *task_name = get_mig_task_name(vm, src_pm, dst_pm, stage);
msg_task_t task = MSG_task_create(task_name, 0, size, NULL);
msg_error_t ret;
if (mig_speed > 0)
- ret = MSG_task_send_bounded(task, mbox, mig_speed);
+ ret = MSG_task_send_with_timeout_bounded(task, mbox, timeout, mig_speed);
else
ret = MSG_task_send(task, mbox);
xbt_free(task_name);
+
+ if (ret == MSG_OK) {
+ sent = size;
+ } else if (ret == MSG_TIMEOUT) {
+ sg_size_t remaining = MSG_task_get_remaining_communication(task);
+ sent = size - remaining;
+ XBT_INFO("timeout (%lf s) in sending_migration_data, remaining %llu bytes of %llu",
+ timeout, remaining, size);
+ }
+
+ /* FIXME: why try-and-catch is used here? */
if(ret == MSG_HOST_FAILURE){
- //XBT_INFO("SRC host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
- MSG_task_destroy(task);
- THROWF(host_error, 0, "SRC host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
- }else if(ret == MSG_TRANSFER_FAILURE){
- //XBT_INFO("DST host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
- MSG_task_destroy(task);
- THROWF(host_error, 0, "DST host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ //XBT_INFO("SRC host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ MSG_task_destroy(task);
+ THROWF(host_error, 0, "SRC host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ }else if(ret == MSG_TRANSFER_FAILURE){
+ //XBT_INFO("DST host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ MSG_task_destroy(task);
+ THROWF(host_error, 0, "DST host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
}
double clock_end = MSG_get_clock();
else
XBT_DEBUG("mig-stage%d: sent %llu duration %f actual_speed %f (target %f)", stage, size, duration, actual_speed, mig_speed);
+ return sent;
}
-static double get_updated_size(double computed, double dp_rate, double dp_cap)
+static sg_size_t get_updated_size(double computed, double dp_rate, double dp_cap)
{
double updated_size = computed * dp_rate;
XBT_DEBUG("updated_size %f dp_rate %f", updated_size, dp_rate);
updated_size = dp_cap;
}
- return updated_size;
+ return (sg_size_t) updated_size;
}
static double send_stage1(struct migration_session *ms,
datasize = remaining;
remaining -= datasize;
- send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, datasize, ms->mbox, 1, 0, mig_speed);
+ send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, datasize, ms->mbox, 1, 0, mig_speed, -1);
double computed = lookup_computed_flop_counts(ms->vm, 1, 0);
computed_total += computed;
}
const sg_size_t ramsize = params.ramsize;
const sg_size_t devsize = params.devsize;
const int skip_stage1 = params.skip_stage1;
- const int skip_stage2 = params.skip_stage2;
+ int skip_stage2 = params.skip_stage2;
const double dp_rate = params.dp_rate;
const double dp_cap = params.dp_cap;
const double mig_speed = params.mig_speed;
+ double max_downtime = params.max_downtime;
- msg_vm_t vm=ms->vm;
+ /* hard code it temporally. Fix Me */
+#define MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME 10000000.0
+ double mig_timeout = MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME;
double remaining_size = ramsize + devsize;
+ double threshold = 0.0;
+
+ /* check parameters */
+ if (ramsize == 0)
+ XBT_WARN("migrate a VM, but ramsize is zero");
- double max_downtime = params.max_downtime;
if (max_downtime == 0) {
XBT_WARN("use the default max_downtime value 30ms");
max_downtime = 0.03;
}
- double threshold = 0.00001; /* TODO: cleanup */
-
- /* setting up parameters has done */
-
-
- if (ramsize == 0)
- XBT_WARN("migrate a VM, but ramsize is zero");
-
-
- XBT_DEBUG("mig-stage1: remaining_size %f", remaining_size);
-
/* Stage1: send all memory pages to the destination. */
- start_dirty_page_tracking(vm);
+ XBT_DEBUG("mig-stage1: remaining_size %f", remaining_size);
+ start_dirty_page_tracking(ms->vm);
double computed_during_stage1 = 0;
if (!skip_stage1) {
double clock_prev_send = MSG_get_clock();
TRY {
- computed_during_stage1 = send_stage1(ms, ramsize, mig_speed, dp_rate, dp_cap);
- } CATCH_ANONYMOUS{
+ /* At stage 1, we do not need timeout. We have to send all the memory
+ * pages even though the duration of this tranfer exceeds the timeout
+ * value. */
+ XBT_INFO("Stage 1: Gonna send %llu", ramsize);
+ sg_size_t sent = send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, ramsize, ms->mbox, 1, 0, mig_speed, -1);
+ remaining_size -= sent;
+ computed_during_stage1 = lookup_computed_flop_counts(ms->vm, 1, 0);
+
+ if (sent < ramsize) {
+ XBT_INFO("mig-stage1: timeout, force moving to stage 3");
+ skip_stage2 = 1;
+ } else if (sent > ramsize)
+ XBT_CRITICAL("bug");
+
+ } CATCH_ANONYMOUS {
//hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code)
// Stop the dirty page tracking an return (there is no memory space to release)
- stop_dirty_page_tracking(vm);
+ stop_dirty_page_tracking(ms->vm);
return 0;
}
- remaining_size -= ramsize;
double clock_post_send = MSG_get_clock();
+ mig_timeout -= (clock_post_send - clock_prev_send);
+ if (mig_timeout < 0) {
+ XBT_INFO("The duration of stage 1 exceeds the timeout value (%lf > %lf), skip stage 2",
+ (clock_post_send - clock_prev_send), MIGRATION_TIMEOUT_DO_NOT_HARDCODE_ME);
+ skip_stage2 = 1;
+ }
+
+ /* estimate bandwidth */
double bandwidth = ramsize / (clock_post_send - clock_prev_send);
threshold = get_threshold_value(bandwidth, max_downtime);
XBT_DEBUG("actual bandwidth %f (MB/s), threshold %f", bandwidth / 1024 / 1024, threshold);
* becomes smaller than the threshold value. */
if (skip_stage2)
goto stage3;
- if (max_downtime == 0) {
- XBT_WARN("no max_downtime parameter, skip stage2");
- goto stage3;
- }
int stage2_round = 0;
for (;;) {
- double updated_size = 0;
- if (stage2_round == 0) {
- /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */
+ sg_size_t updated_size = 0;
+ if (stage2_round == 0) {
+ /* just after stage1, nothing has been updated. But, we have to send the
+ * data updated during stage1 */
updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap);
} else {
double computed = lookup_computed_flop_counts(ms->vm, 2, stage2_round);
updated_size = get_updated_size(computed, dp_rate, dp_cap);
}
- XBT_DEBUG("mig-stage 2:%d updated_size %f computed_during_stage1 %f dp_rate %f dp_cap %f",
+ XBT_DEBUG("mig-stage 2:%d updated_size %llu computed_during_stage1 %f dp_rate %f dp_cap %f",
stage2_round, updated_size, computed_during_stage1, dp_rate, dp_cap);
- // if (stage2_round != 0) {
- // /* during stage1, we have already created overhead tasks */
- // double overhead = dpt_cpu_overhead * updated_size;
- // XBT_DEBUG("updated %f overhead %f", updated_size, overhead);
- // launch_deferred_exec_process(vm, overhead, 10000);
- // }
-
-
- {
- remaining_size += updated_size;
+ /* Check whether the remaining size is below the threshold value. If so,
+ * move to stage 3. */
+ remaining_size += updated_size;
+ XBT_DEBUG("mig-stage2.%d: remaining_size %f (%s threshold %f)", stage2_round,
+ remaining_size, (remaining_size < threshold) ? "<" : ">", threshold);
+ if (remaining_size < threshold)
+ break;
- XBT_DEBUG("mig-stage2.%d: remaining_size %f (%s threshold %f)", stage2_round,
- remaining_size, (remaining_size < threshold) ? "<" : ">", threshold);
-
- if (remaining_size < threshold)
- break;
- }
+ sg_size_t sent = 0;
double clock_prev_send = MSG_get_clock();
- TRY{
- send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, updated_size, ms->mbox, 2, stage2_round, mig_speed);
- }CATCH_ANONYMOUS{
+ TRY {
+ XBT_DEBUG("Stage 2, gonna send %llu", updated_size);
+ sent = send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, updated_size, ms->mbox, 2, stage2_round, mig_speed, mig_timeout);
+ } CATCH_ANONYMOUS {
//hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code)
// Stop the dirty page tracking an return (there is no memory space to release)
- stop_dirty_page_tracking(vm);
+ stop_dirty_page_tracking(ms->vm);
return 0;
}
double clock_post_send = MSG_get_clock();
- double bandwidth = updated_size / (clock_post_send - clock_prev_send);
- threshold = get_threshold_value(bandwidth, max_downtime);
- XBT_DEBUG("actual bandwidth %f, threshold %f", bandwidth / 1024 / 1024, threshold);
+ if (sent == updated_size) {
+ /* timeout did not happen */
+ double bandwidth = updated_size / (clock_post_send - clock_prev_send);
+ threshold = get_threshold_value(bandwidth, max_downtime);
+ XBT_DEBUG("actual bandwidth %f, threshold %f", bandwidth / 1024 / 1024, threshold);
+ remaining_size -= sent;
+ stage2_round += 1;
+ mig_timeout -= (clock_post_send - clock_prev_send);
+ xbt_assert(mig_timeout > 0);
+
+ } else if (sent < updated_size) {
+ /* When timeout happens, we move to stage 3. The size of memory pages
+ * updated before timeout must be added to the remaining size. */
+ XBT_INFO("mig-stage2.%d: timeout, force moving to stage 3. sent %llu / %llu, eta %lf",
+ stage2_round, sent, updated_size, (clock_post_send - clock_prev_send));
+ remaining_size -= sent;
+ double computed = lookup_computed_flop_counts(ms->vm, 2, stage2_round);
+ updated_size = get_updated_size(computed, dp_rate, dp_cap);
+ remaining_size += updated_size;
+ break;
- remaining_size -= updated_size;
- stage2_round += 1;
+ } else
+ XBT_CRITICAL("bug");
}
stage3:
/* Stage3: stop the VM and copy the rest of states. */
XBT_DEBUG("mig-stage3: remaining_size %f", remaining_size);
- simcall_vm_suspend(vm);
- stop_dirty_page_tracking(vm);
-
- TRY{
- send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, remaining_size, ms->mbox, 3, 0, mig_speed);
- }CATCH_ANONYMOUS{
- //hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code)
- // Stop the dirty page tracking an return (there is no memory space to release)
- simcall_vm_resume(vm);
- return 0;
- }
-
- // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate effectively the VM on the DST node.
+ simcall_vm_suspend(ms->vm);
+ stop_dirty_page_tracking(ms->vm);
+
+ TRY {
+ XBT_DEBUG("Stage 3: Gonna send %f", remaining_size);
+ send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, remaining_size, ms->mbox, 3, 0, mig_speed, -1);
+ } CATCH_ANONYMOUS {
+ //hostfailure (if you want to know whether this is the SRC or the DST please check directly in send_migration_data code)
+ // Stop the dirty page tracking an return (there is no memory space to release)
+ simcall_vm_resume(ms->vm);
+ return 0;
+ }
+ // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate effectively the VM on the DST node.
XBT_DEBUG("mig: tx_done");
return 0;
char *pr_rx_name = get_mig_process_rx_name(vm, src_pm, dst_pm);
char *pr_tx_name = get_mig_process_tx_name(vm, src_pm, dst_pm);
+// msg_process_t tx_process, rx_process;
// MSG_process_create(pr_rx_name, migration_rx_fun, ms, dst_pm);
// MSG_process_create(pr_tx_name, migration_tx_fun, ms, src_pm);
#if 1
char **argv = xbt_new(char *, 2);
argv[0] = pr_rx_name;
argv[1] = NULL;
- MSG_process_create_with_arguments(pr_rx_name, migration_rx_fun, ms, dst_pm, 1, argv);
+/*rx_process = */ MSG_process_create_with_arguments(pr_rx_name, migration_rx_fun, ms, dst_pm, 1, argv);
}
{
char **argv = xbt_new(char *, 2);
argv[0] = pr_tx_name;
argv[1] = NULL;
- MSG_process_create_with_arguments(pr_tx_name, migration_tx_fun, ms, src_pm, 1, argv);
+/* tx_process = */MSG_process_create_with_arguments(pr_tx_name, migration_tx_fun, ms, src_pm, 1, argv);
}
#endif
msg_task_t task = NULL;
msg_error_t ret = MSG_TIMEOUT;
while (ret == MSG_TIMEOUT && MSG_host_is_on(dst_pm)) //Wait while you receive the message o
- ret = MSG_task_receive_with_timeout(&task, ms->mbox_ctl, 10);
+ ret = MSG_task_receive_with_timeout(&task, ms->mbox_ctl, 4);
xbt_free(ms->mbox_ctl);
xbt_free(ms->mbox);
if(ret == MSG_HOST_FAILURE){
// Note that since the communication failed, the owner did not change and the task should be destroyed on the other side.
// Hence, just throw the execption
- //XBT_INFO("SRC crashes, throw an exception (m-control)");
- return -1;
- }
+ XBT_INFO("SRC crashes, throw an exception (m-control)");
+ //MSG_process_kill(tx_process); // Adrien, I made a merge on Nov 28th 2014, I'm not sure whether this line is required or not
+ return -1;
+ }
else if((ret == MSG_TRANSFER_FAILURE) || (ret == MSG_TIMEOUT)){ // MSG_TIMEOUT here means that MSG_host_is_avail() returned false.
- //XBT_INFO("DST crashes, throw an exception (m-control)");
- return -2;
+ XBT_INFO("DST crashes, throw an exception (m-control)");
+ return -2;
}
msg_host_t old_pm = simcall_vm_get_pm(vm);
+ if(MSG_host_is_off(old_pm))
+ THROWF(vm_error, 0, "SRC host(%s) seems off, cannot start a migration", sg_host_name(old_pm));
+
+ if(MSG_host_is_off(new_pm))
+ THROWF(vm_error, 0, "DST host(%s) seems off, cannot start a migration", sg_host_name(new_pm));
+
if (!MSG_vm_is_running(vm))
THROWF(vm_error, 0, "VM(%s) is not running", sg_host_name(vm));
// This part is done in the RX code, to handle the corner case where SRC can crash just at the end of the migration process
// In that case, the VM has been already assigned to the DST node.
//XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm->key, old_pm->key, new_pm->key);
- //#ifdef HAVE_TRACING
//TRACE_msg_vm_change_host(vm, old_pm, new_pm);
- //#endif
}
XBT_DEBUG("vm_suspend done");
- #ifdef HAVE_TRACING
TRACE_msg_vm_suspend(vm);
- #endif
}
{
simcall_vm_resume(vm);
- #ifdef HAVE_TRACING
TRACE_msg_vm_resume(vm);
- #endif
}
THROWF(vm_error, 0, "VM(%s) is migrating", sg_host_name(vm));
simcall_vm_save(vm);
- #ifdef HAVE_TRACING
TRACE_msg_vm_save(vm);
- #endif
}
/** @brief Restore the execution of the VM. All processes on the VM run again.
{
simcall_vm_restore(vm);
- #ifdef HAVE_TRACING
TRACE_msg_vm_restore(vm);
- #endif
}