+static void start_dirty_page_tracking(msg_vm_t vm)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ priv->dp_enabled = 1;
+
+ reset_dirty_pages(vm);
+}
+
+static void stop_dirty_page_tracking(msg_vm_t vm)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ priv->dp_enabled = 0;
+}
+
+#if 0
+/* It might be natural that we define dp_rate for each task. But, we will also
+ * have to care about how each task behavior affects the memory update behavior
+ * at the operating system level. It may not be easy to model it with a simple algorithm. */
+double calc_updated_pages(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
+{
+ double computed = dp->prev_remaining - remaining;
+ double duration = clock - dp->prev_clock;
+ double updated = dp->task->dp_rate * computed;
+
+ XBT_INFO("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
+ key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
+ XBT_INFO("%s@%s: updated %f bytes, %f Mbytes/s",
+ key, sg_host_name(vm), updated, updated / duration / 1000 / 1000);
+
+ return updated;
+}
+#endif
+
+double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
+{
+ double computed = dp->prev_remaining - remaining;
+ double duration = clock - dp->prev_clock;
+
+ XBT_DEBUG("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
+ key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
+
+ return computed;
+}
+
+static double lookup_computed_flop_counts(msg_vm_t vm, int stage2_round_for_fancy_debug)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(vm);
+ double total = 0;
+
+ char *key = NULL;
+ xbt_dict_cursor_t cursor = NULL;
+ dirty_page_t dp = NULL;
+ xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
+ double remaining = MSG_task_get_remaining_computation(dp->task);
+ double clock = MSG_get_clock();
+
+ // total += calc_updated_pages(key, vm, dp, remaining, clock);
+ total += get_computed(key, vm, dp, remaining, clock);
+
+ dp->prev_remaining = remaining;
+ dp->prev_clock = clock;
+ }
+
+ total += priv->dp_updated_by_deleted_tasks;
+
+ XBT_INFO("mig-stage2.%d: computed %f flop_counts (including %f by deleted tasks)",
+ stage2_round_for_fancy_debug,
+ total, priv->dp_updated_by_deleted_tasks);
+
+
+
+ priv->dp_updated_by_deleted_tasks = 0;
+
+
+ return total;
+}
+
+// TODO Is this code redundant with the information provided by
+// msg_process_t MSG_process_create(const char *name, xbt_main_func_t code, void *data, msg_host_t host)
+void MSG_host_add_task(msg_host_t host, msg_task_t task)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(host);
+ double remaining = MSG_task_get_remaining_computation(task);
+ char *key = bprintf("%s-%lld", task->name, task->counter);
+
+ dirty_page_t dp = xbt_new0(s_dirty_page, 1);
+ dp->task = task;
+
+ /* It should be okay that we add a task onto a migrating VM. */
+ if (priv->dp_enabled) {
+ dp->prev_clock = MSG_get_clock();
+ dp->prev_remaining = remaining;
+ }
+
+ xbt_assert(xbt_dict_get_or_null(priv->dp_objs, key) == NULL);
+ xbt_dict_set(priv->dp_objs, key, dp, NULL);
+ XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_name(host), remaining, priv->dp_enabled);
+
+ xbt_free(key);
+}
+
+void MSG_host_del_task(msg_host_t host, msg_task_t task)
+{
+ msg_host_priv_t priv = msg_host_resource_priv(host);
+
+ char *key = bprintf("%s-%lld", task->name, task->counter);
+
+ dirty_page_t dp = xbt_dict_get_or_null(priv->dp_objs, key);
+ xbt_assert(dp->task == task);
+
+ /* If we are in the middle of dirty page tracking, we record how much
+ * computaion has been done until now, and keep the information for the
+ * lookup_() function that will called soon. */
+ if (priv->dp_enabled) {
+ double remaining = MSG_task_get_remaining_computation(task);
+ double clock = MSG_get_clock();
+ // double updated = calc_updated_pages(key, host, dp, remaining, clock);
+ double updated = get_computed(key, host, dp, remaining, clock);
+
+ priv->dp_updated_by_deleted_tasks += updated;
+ }
+
+ xbt_dict_remove(priv->dp_objs, key);
+ xbt_free(dp);
+
+ XBT_DEBUG("del %s on %s", key, sg_host_name(host));
+
+ xbt_free(key);
+}
+
+
+static void send_migration_data(const char *vm_name, const char *src_pm_name, const char *dst_pm_name,
+ double size, char *mbox, int stage, int stage2_round, double mig_speed)
+{
+ char *task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, stage);
+ msg_task_t task = MSG_task_create(task_name, 0, size, NULL);
+
+ msg_error_t ret;
+ if (mig_speed > 0)
+ ret = MSG_task_send_bounded(task, mbox, mig_speed);
+ else
+ ret = MSG_task_send(task, mbox);
+ xbt_assert(ret == MSG_OK);
+
+ if (stage == 2)
+ XBT_INFO("mig-stage%d.%d: sent %f", stage, stage2_round, size);
+ else
+ XBT_INFO("mig-stage%d: sent %f", stage, size);
+
+ xbt_free(task_name);
+}
+
+
+static int migration_tx_fun(int argc, char *argv[])
+{
+ const char *pr_name = MSG_process_get_name(MSG_process_self());
+ const char *host_name = MSG_host_get_name(MSG_host_self());
+
+ XBT_DEBUG("mig: tx_start");
+
+ xbt_assert(argc == 4);
+ const char *vm_name = argv[1];
+ const char *src_pm_name = argv[2];
+ const char *dst_pm_name = argv[3];
+ msg_vm_t vm = MSG_get_host_by_name(vm_name);
+
+
+ s_ws_params_t params;
+ simcall_host_get_params(vm, ¶ms);
+ const long ramsize = params.ramsize;
+ const long devsize = params.devsize;
+ const int skip_stage2 = params.skip_stage2;
+ const double max_downtime = params.max_downtime;
+ const double dp_rate = params.dp_rate;
+ const double dp_cap = params.dp_cap;
+ const double mig_speed = params.mig_speed;
+ double remaining_size = ramsize + devsize;
+ double threshold = max_downtime * 125 * 1000 * 1000;
+
+
+ if (ramsize == 0)
+ XBT_WARN("migrate a VM, but ramsize is zero");
+
+ char *mbox = get_mig_mbox_src_dst(vm_name, src_pm_name, dst_pm_name);
+
+ XBT_INFO("mig-stage1: remaining_size %f", remaining_size);
+
+ /* Stage1: send all memory pages to the destination. */
+ start_dirty_page_tracking(vm);
+
+ send_migration_data(vm_name, src_pm_name, dst_pm_name, ramsize, mbox, 1, 0, mig_speed);
+
+ remaining_size -= ramsize;
+
+
+
+ /* Stage2: send update pages iteratively until the size of remaining states
+ * becomes smaller than the threshold value. */
+ if (skip_stage2)
+ goto stage3;
+ if (max_downtime == 0) {
+ XBT_WARN("no max_downtime parameter, skip stage2");
+ goto stage3;
+ }
+
+
+ int stage2_round = 0;
+ for (;;) {
+ // long updated_size = lookup_dirty_pages(vm);
+ double updated_size = lookup_computed_flop_counts(vm, stage2_round) * dp_rate;
+ if (updated_size > dp_cap) {
+ XBT_INFO("mig-stage2.%d: %f bytes updated, but cap it with the working set size %f",
+ stage2_round, updated_size, dp_cap);
+ updated_size = dp_cap;
+ }
+
+ remaining_size += updated_size;
+
+ XBT_INFO("mig-stage2.%d: remaining_size %f (%s threshold %f)", stage2_round,
+ remaining_size, (remaining_size < threshold) ? "<" : ">", threshold);
+
+ if (remaining_size < threshold)
+ break;
+
+ send_migration_data(vm_name, src_pm_name, dst_pm_name, updated_size, mbox, 2, stage2_round, mig_speed);
+
+ remaining_size -= updated_size;
+ stage2_round += 1;
+ }
+
+
+stage3:
+ /* Stage3: stop the VM and copy the rest of states. */
+ XBT_INFO("mig-stage3: remaining_size %f", remaining_size);
+ simcall_vm_suspend(vm);
+ stop_dirty_page_tracking(vm);
+
+ send_migration_data(vm_name, src_pm_name, dst_pm_name, remaining_size, mbox, 3, 0, mig_speed);
+
+ xbt_free(mbox);
+
+ XBT_DEBUG("mig: tx_done");
+
+ return 0;
+}
+
+
+
+static void do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm)
+{
+ char *mbox_ctl = get_mig_mbox_ctl(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
+
+ {
+ char *pr_name = get_mig_process_rx_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
+ int nargvs = 5;
+ char **argv = xbt_new(char *, nargvs);
+ argv[0] = xbt_strdup(pr_name);
+ argv[1] = xbt_strdup(sg_host_name(vm));
+ argv[2] = xbt_strdup(sg_host_name(src_pm));
+ argv[3] = xbt_strdup(sg_host_name(dst_pm));
+ argv[4] = NULL;
+
+ msg_process_t pr = MSG_process_create_with_arguments(pr_name, migration_rx_fun, NULL, dst_pm, nargvs - 1, argv);
+
+ xbt_free(pr_name);
+ }
+
+ {
+ char *pr_name = get_mig_process_tx_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
+ int nargvs = 5;
+ char **argv = xbt_new(char *, nargvs);
+ argv[0] = xbt_strdup(pr_name);
+ argv[1] = xbt_strdup(sg_host_name(vm));
+ argv[2] = xbt_strdup(sg_host_name(src_pm));
+ argv[3] = xbt_strdup(sg_host_name(dst_pm));
+ argv[4] = NULL;
+ msg_process_t pr = MSG_process_create_with_arguments(pr_name, migration_tx_fun, NULL, src_pm, nargvs - 1, argv);
+
+ xbt_free(pr_name);
+ }
+
+ /* wait until the migration have finished */
+ {
+ msg_task_t task = NULL;
+ msg_error_t ret = MSG_task_recv(&task, mbox_ctl);
+ xbt_assert(ret == MSG_OK);
+
+ char *expected_task_name = get_mig_task_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm), 4);
+ xbt_assert(strcmp(task->name, expected_task_name) == 0);
+ xbt_free(expected_task_name);
+ }
+
+ xbt_free(mbox_ctl);
+}
+
+
+/** @brief Migrate the VM to the given host.