+static inline char *get_mig_process_rx_name(const char *vm_name, const char *src_pm_name, const char *dst_pm_name)
+{
+ return bprintf("__pr_mig_rx:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name);
+}
+
+static inline char *get_mig_task_name(const char *vm_name, const char *src_pm_name, const char *dst_pm_name, int stage)
+{
+ return bprintf("__task_mig_stage%d:%s(%s-%s)", stage, vm_name, src_pm_name, dst_pm_name);
+}
+
+static void launch_deferred_exec_process(msg_host_t host, double computation, double prio);
+
+static int migration_rx_fun(int argc, char *argv[])
+{
+ XBT_DEBUG("mig: rx_start");
+
+ xbt_assert(argc == 4);
+ const char *vm_name = argv[1];
+ const char *src_pm_name = argv[2];
+ const char *dst_pm_name = argv[3];
+ msg_vm_t vm = MSG_get_host_by_name(vm_name);
+ msg_vm_t dst_pm = MSG_get_host_by_name(dst_pm_name);
+
+
+ s_ws_params_t params;
+ simcall_host_get_params(vm, ¶ms);
+ const double xfer_cpu_overhead = params.xfer_cpu_overhead;
+
+
+ int need_exit = 0;
+
+ char *mbox = get_mig_mbox_src_dst(vm_name, src_pm_name, dst_pm_name);
+ char *mbox_ctl = get_mig_mbox_ctl(vm_name, src_pm_name, dst_pm_name);
+ char *finalize_task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, 3);
+
+ for (;;) {
+ msg_task_t task = NULL;
+ MSG_task_recv(&task, mbox);
+ {
+ double received = MSG_task_get_data_size(task);
+ /* TODO: clean up */
+ // const double alpha = 0.22L * 1.0E8 / (80L * 1024 * 1024);
+ launch_deferred_exec_process(vm, received * xfer_cpu_overhead, 1);
+ }
+
+ if (strcmp(task->name, finalize_task_name) == 0)
+ need_exit = 1;
+
+ MSG_task_destroy(task);
+
+ if (need_exit)
+ break;
+ }
+
+
+ simcall_vm_migrate(vm, dst_pm);
+ simcall_vm_resume(vm);
+
+ {
+ char *task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, 4);
+
+ msg_task_t task = MSG_task_create(task_name, 0, 0, NULL);
+ msg_error_t ret = MSG_task_send(task, mbox_ctl);
+ xbt_assert(ret == MSG_OK);
+
+ xbt_free(task_name);
+ }
+
+
+ xbt_free(mbox);
+ xbt_free(mbox_ctl);
+ xbt_free(finalize_task_name);
+
+ XBT_DEBUG("mig: rx_done");
+
+ return 0;
+}
+
+static void reset_dirty_pages(msg_vm_t vm)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+
+ char *key = NULL;
+ xbt_dict_cursor_t cursor = NULL;
+ dirty_page_t dp = NULL;
+ xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
+ double remaining = MSG_task_get_remaining_computation(dp->task);
+ dp->prev_clock = MSG_get_clock();
+ dp->prev_remaining = remaining;
+
+ // XBT_INFO("%s@%s remaining %f", key, sg_host_name(vm), remaining);
+ }
+}
+
+static void start_dirty_page_tracking(msg_vm_t vm)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ priv->dp_enabled = 1;
+
+ reset_dirty_pages(vm);
+}
+
+static void stop_dirty_page_tracking(msg_vm_t vm)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ priv->dp_enabled = 0;
+}
+
+#if 0
+/* It might be natural that we define dp_rate for each task. But, we will also
+ * have to care about how each task behavior affects the memory update behavior
+ * at the operating system level. It may not be easy to model it with a simple algorithm. */
+double calc_updated_pages(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
+{
+ double computed = dp->prev_remaining - remaining;
+ double duration = clock - dp->prev_clock;
+ double updated = dp->task->dp_rate * computed;
+
+ XBT_INFO("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
+ key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
+ XBT_INFO("%s@%s: updated %f bytes, %f Mbytes/s",
+ key, sg_host_name(vm), updated, updated / duration / 1000 / 1000);
+
+ return updated;
+}
+#endif
+
+static double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
+{
+ double computed = dp->prev_remaining - remaining;
+ double duration = clock - dp->prev_clock;
+
+ XBT_DEBUG("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
+ key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
+
+ return computed;
+}
+
+static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug, int stage2_round_for_fancy_debug)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ double total = 0;
+
+ char *key = NULL;
+ xbt_dict_cursor_t cursor = NULL;
+ dirty_page_t dp = NULL;
+ xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
+ double remaining = MSG_task_get_remaining_computation(dp->task);
+ double clock = MSG_get_clock();
+
+ // total += calc_updated_pages(key, vm, dp, remaining, clock);
+ total += get_computed(key, vm, dp, remaining, clock);
+
+ dp->prev_remaining = remaining;
+ dp->prev_clock = clock;
+ }
+
+ total += priv->dp_updated_by_deleted_tasks;
+
+ XBT_INFO("mig-stage%d.%d: computed %f flop_counts (including %f by deleted tasks)",
+ stage_for_fancy_debug,
+ stage2_round_for_fancy_debug,
+ total, priv->dp_updated_by_deleted_tasks);
+
+
+
+ priv->dp_updated_by_deleted_tasks = 0;
+
+
+ return total;
+}
+
+// TODO Is this code redundant with the information provided by
+// msg_process_t MSG_process_create(const char *name, xbt_main_func_t code, void *data, msg_host_t host)
+void MSG_host_add_task(msg_host_t host, msg_task_t task)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(host);
+ double remaining = MSG_task_get_remaining_computation(task);
+ char *key = bprintf("%s-%lld", task->name, task->counter);
+
+ dirty_page_t dp = xbt_new0(s_dirty_page, 1);
+ dp->task = task;
+
+ /* It should be okay that we add a task onto a migrating VM. */
+ if (priv->dp_enabled) {
+ dp->prev_clock = MSG_get_clock();
+ dp->prev_remaining = remaining;
+ }
+
+ xbt_assert(xbt_dict_get_or_null(priv->dp_objs, key) == NULL);
+ xbt_dict_set(priv->dp_objs, key, dp, NULL);
+ XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_name(host), remaining, priv->dp_enabled);
+
+ xbt_free(key);
+}
+
+void MSG_host_del_task(msg_host_t host, msg_task_t task)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(host);
+
+ char *key = bprintf("%s-%lld", task->name, task->counter);
+
+ dirty_page_t dp = xbt_dict_get_or_null(priv->dp_objs, key);
+ xbt_assert(dp->task == task);
+
+ /* If we are in the middle of dirty page tracking, we record how much
+ * computaion has been done until now, and keep the information for the
+ * lookup_() function that will called soon. */
+ if (priv->dp_enabled) {
+ double remaining = MSG_task_get_remaining_computation(task);
+ double clock = MSG_get_clock();
+ // double updated = calc_updated_pages(key, host, dp, remaining, clock);
+ double updated = get_computed(key, host, dp, remaining, clock);
+
+ priv->dp_updated_by_deleted_tasks += updated;
+ }
+
+ xbt_dict_remove(priv->dp_objs, key);
+ xbt_free(dp);
+
+ XBT_DEBUG("del %s on %s", key, sg_host_name(host));
+
+ xbt_free(key);
+}
+
+
+static int deferred_exec_fun(int argc, char *argv[])
+{
+ xbt_assert(argc == 3);
+ const char *comp_str = argv[1];
+ double computaion = atof(comp_str);
+ const char *prio_str = argv[2];
+ double prio = atof(prio_str);
+
+ msg_task_t task = MSG_task_create("__task_deferred", computaion, 0, NULL);
+ // XBT_INFO("exec deferred %f", computaion);
+
+ /* dpt is the results of the VM activity */
+ MSG_task_set_priority(task, prio);
+ MSG_task_execute(task);
+
+
+
+ MSG_task_destroy(task);
+
+ return 0;
+}
+
+static void launch_deferred_exec_process(msg_host_t host, double computation, double prio)
+{
+ char *pr_name = bprintf("__pr_deferred_exec_%s", MSG_host_get_name(host));
+
+ int nargvs = 4;
+ char **argv = xbt_new(char *, nargvs);
+ argv[0] = xbt_strdup(pr_name);
+ argv[1] = bprintf("%lf", computation);
+ argv[2] = bprintf("%lf", prio);
+ argv[3] = NULL;
+
+ MSG_process_create_with_arguments(pr_name, deferred_exec_fun, NULL, host, nargvs - 1, argv);
+
+ xbt_free(pr_name);
+}
+
+
+static int task_tx_overhead_fun(int argc, char *argv[])
+{
+ xbt_assert(argc == 2);
+ const char *mbox = argv[1];
+
+ int need_exit = 0;
+
+ // XBT_INFO("start %s", mbox);
+
+ for (;;) {
+ msg_task_t task = NULL;
+ MSG_task_recv(&task, mbox);
+
+ // XBT_INFO("task->name %s", task->name);
+
+ if (strcmp(task->name, "finalize_making_overhead") == 0)
+ need_exit = 1;
+
+ // XBT_INFO("exec");
+ // MSG_task_set_priority(task, 1000000);
+ MSG_task_execute(task);
+ MSG_task_destroy(task);
+
+ if (need_exit)
+ break;
+ }
+
+ // XBT_INFO("bye");
+
+ return 0;
+}
+
+static void start_overhead_process(msg_task_t comm_task)
+{
+ char *pr_name = bprintf("__pr_task_tx_overhead_%s", MSG_task_get_name(comm_task));
+ char *mbox = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
+
+ int nargvs = 3;
+ char **argv = xbt_new(char *, nargvs);
+ argv[0] = xbt_strdup(pr_name);
+ argv[1] = xbt_strdup(mbox);
+ argv[2] = NULL;
+
+ // XBT_INFO("micro start: mbox %s", mbox);
+ MSG_process_create_with_arguments(pr_name, task_tx_overhead_fun, NULL, MSG_host_self(), nargvs - 1, argv);
+
+ xbt_free(pr_name);
+ xbt_free(mbox);
+}
+
+static void shutdown_overhead_process(msg_task_t comm_task)
+{
+ char *mbox = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
+
+ msg_task_t task = MSG_task_create("finalize_making_overhead", 0, 0, NULL);
+
+ // XBT_INFO("micro shutdown: mbox %s", mbox);
+ msg_error_t ret = MSG_task_send(task, mbox);
+ xbt_assert(ret == MSG_OK);
+
+ xbt_free(mbox);
+ // XBT_INFO("shutdown done");
+}
+
+static void request_overhead(msg_task_t comm_task, double computation)
+{
+ char *mbox = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
+
+ msg_task_t task = MSG_task_create("micro", computation, 0, NULL);
+
+ // XBT_INFO("req overhead");
+ msg_error_t ret = MSG_task_send(task, mbox);
+ xbt_assert(ret == MSG_OK);
+
+ xbt_free(mbox);
+}
+
+/* alpha is (floating_operations / bytes).