src/msg/msg_vm.c

   1 /* Copyright (c) 2012-2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 // QUESTIONS:
   8 // 1./ check how and where a new VM is added to the list of the hosts
   9 // 2./ Diff between SIMIX_Actions and SURF_Actions
  10 // => SIMIX_actions : point synchro entre processus de niveau (theoretically speaking I do not have to create such SIMIX_ACTION
  11 // =>  Surf_Actions
  12
  13 // TODO
  14 //      MSG_TRACE can be revisited in order to use  the host
  15 //      To implement a mixed model between workstation and vm_workstation,
  16 //     please give a look at surf_model_private_t model_private at SURF Level and to the share resource functions
  17 //     double (*share_resources) (double now);
  18 //      For the action into the vm workstation model, we should be able to leverage the usual one (and if needed, look at
  19 //              the workstation model.
  20
  21 #include "msg_private.h"
  22 #include "xbt/sysdep.h"
  23 #include "xbt/log.h"
  24 #include "simgrid/platf.h"
  25
  26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(msg_vm, msg,
  27                                 "Cloud-oriented parts of the MSG API");
  28
  29
  30 /* **** ******** GENERAL ********* **** */
  31
  32 /** \ingroup m_vm_management
  33  * \brief Returns the value of a given vm property
  34  *
  35  * \param vm a vm
  36  * \param name a property name
  37  * \return value of a property (or NULL if property not set)
  38  */
  39
  40 const char *MSG_vm_get_property_value(msg_vm_t vm, const char *name)
  41 {
  42   return MSG_host_get_property_value(vm, name);
  43 }
  44
  45 /** \ingroup m_vm_management
  46  * \brief Returns a xbt_dict_t consisting of the list of properties assigned to this host
  47  *
  48  * \param vm a vm
  49  * \return a dict containing the properties
  50  */
  51 xbt_dict_t MSG_vm_get_properties(msg_vm_t vm)
  52 {
  53   xbt_assert((vm != NULL), "Invalid parameters (vm is NULL)");
  54
  55   return (simcall_host_get_properties(vm));
  56 }
  57
  58 /** \ingroup m_host_management
  59  * \brief Change the value of a given host property
  60  *
  61  * \param vm a vm
  62  * \param name a property name
  63  * \param value what to change the property to
  64  * \param free_ctn the freeing function to use to kill the value on need
  65  */
  66 void MSG_vm_set_property_value(msg_vm_t vm, const char *name, void *value, void_f_pvoid_t free_ctn)
  67 {
  68   xbt_dict_set(MSG_host_get_properties(vm), name, value, free_ctn);
  69 }
  70
  71 /** \ingroup msg_vm_management
  72  * \brief Finds a msg_vm_t using its name.
  73  *
  74  * This is a name directory service
  75  * \param name the name of a vm.
  76  * \return the corresponding vm
  77  *
  78  * Please note that a VM is a specific host. Hence, you should give a different name
  79  * for each VM/PM.
  80  */
  81
  82 msg_vm_t MSG_vm_get_by_name(const char *name)
  83 {
  84         return MSG_get_host_by_name(name);
  85 }
  86
  87 /** \ingroup m_vm_management
  88  *
  89  * \brief Return the name of the #msg_host_t.
  90  *
  91  * This functions checks whether \a host is a valid pointer or not and return
  92    its name.
  93  */
  94 const char *MSG_vm_get_name(msg_vm_t vm)
  95 {
  96   return MSG_host_get_name(vm);
  97 }
  98
  99
 100 /* **** Check state of a VM **** */
 101 static inline int __MSG_vm_is_state(msg_vm_t vm, e_surf_vm_state_t state)
 102 {
 103   return simcall_vm_get_state(vm) == state;
 104 }
 105
 106 /** @brief Returns whether the given VM has just reated, not running.
 107  *  @ingroup msg_VMs
 108  */
 109 int MSG_vm_is_created(msg_vm_t vm)
 110 {
 111   return __MSG_vm_is_state(vm, SURF_VM_STATE_CREATED);
 112 }
 113
 114 /** @brief Returns whether the given VM is currently running
 115  *  @ingroup msg_VMs
 116  */
 117 int MSG_vm_is_running(msg_vm_t vm)
 118 {
 119   return __MSG_vm_is_state(vm, SURF_VM_STATE_RUNNING);
 120 }
 121
 122 /** @brief Returns whether the given VM is currently migrating
 123  *  @ingroup msg_VMs
 124  */
 125 int MSG_vm_is_migrating(msg_vm_t vm)
 126 {
 127   return __MSG_vm_is_state(vm, SURF_VM_STATE_MIGRATING);
 128 }
 129
 130 /** @brief Returns whether the given VM is currently suspended, not running.
 131  *  @ingroup msg_VMs
 132  */
 133 int MSG_vm_is_suspended(msg_vm_t vm)
 134 {
 135   return __MSG_vm_is_state(vm, SURF_VM_STATE_SUSPENDED);
 136 }
 137
 138 /** @brief Returns whether the given VM is being saved (FIXME: live saving or not?).
 139  *  @ingroup msg_VMs
 140  */
 141 int MSG_vm_is_saving(msg_vm_t vm)
 142 {
 143   return __MSG_vm_is_state(vm, SURF_VM_STATE_SAVING);
 144 }
 145
 146 /** @brief Returns whether the given VM has been saved, not running.
 147  *  @ingroup msg_VMs
 148  */
 149 int MSG_vm_is_saved(msg_vm_t vm)
 150 {
 151   return __MSG_vm_is_state(vm, SURF_VM_STATE_SAVED);
 152 }
 153
 154 /** @brief Returns whether the given VM is being restored, not running.
 155  *  @ingroup msg_VMs
 156  */
 157 int MSG_vm_is_restoring(msg_vm_t vm)
 158 {
 159   return __MSG_vm_is_state(vm, SURF_VM_STATE_RESTORING);
 160 }
 161
 162
 163
 164 /* ------------------------------------------------------------------------- */
 165 /* ------------------------------------------------------------------------- */
 166
 167 /* **** ******** MSG vm actions ********* **** */
 168
 169 /** @brief Create a new VM with specified parameters.
 170  *  @ingroup msg_VMs*
 171  *  All parameters are in MBytes
 172  *
 173  */
 174 msg_vm_t MSG_vm_create(msg_host_t ind_pm, const char *name,
 175                        int ncpus, int ramsize,
 176                        int net_cap, char *disk_path, int disksize,
 177                        int mig_netspeed, int dp_intensity)
 178 {
 179   /* For the moment, intensity_rate is the percentage against the migration
 180    * bandwidth */
 181   double host_speed = MSG_get_host_speed(ind_pm);
 182   double update_speed = ((double)dp_intensity/100) * mig_netspeed;
 183
 184   msg_vm_t vm = MSG_vm_create_core(ind_pm, name);
 185   s_ws_params_t params;
 186   memset(&params, 0, sizeof(params));
 187   params.ramsize = (sg_size_t)ramsize * 1024 * 1024;
 188   //params.overcommit = 0;
 189   params.devsize = 0;
 190   params.skip_stage2 = 0;
 191   params.max_downtime = 0.03;
 192   params.dp_rate = (update_speed * 1024 * 1024) / host_speed;
 193   params.dp_cap = params.ramsize * 0.9; // assume working set memory is 90% of ramsize
 194   params.mig_speed = (double)mig_netspeed * 1024 * 1024; // mig_speed
 195
 196   //XBT_INFO("dp rate %f migspeed : %f intensity mem : %d, updatespeed %f, hostspeed %f",params.dp_rate, params.mig_speed, dp_intensity, update_speed, host_speed);
 197   simcall_host_set_params(vm, &params);
 198
 199   return vm;
 200 }
 201
 202
 203 /** @brief Create a new VM object. The VM is not yet started. The resource of the VM is allocated upon MSG_vm_start().
 204  *  @ingroup msg_VMs*
 205  *
 206  * A VM is treated as a host. The name of the VM must be unique among all hosts.
 207  */
 208 msg_vm_t MSG_vm_create_core(msg_host_t ind_pm, const char *name)
 209 {
 210   /* make sure the VM of the same name does not exit */
 211   {
 212     void *ind_host_tmp = xbt_lib_get_elm_or_null(host_lib, name);
 213     if (ind_host_tmp) {
 214       XBT_ERROR("host %s already exits", name);
 215       return NULL;
 216     }
 217   }
 218
 219   /* Note: ind_vm and vm_workstation point to the same elm object. */
 220   msg_vm_t ind_vm = NULL;
 221   void *ind_vm_workstation =  NULL;
 222
 223   /* Ask the SIMIX layer to create the surf vm resource */
 224   ind_vm_workstation = simcall_vm_create(name, ind_pm);
 225   ind_vm = (msg_vm_t) __MSG_host_create(ind_vm_workstation);
 226
 227   XBT_DEBUG("A new VM (%s) has been created", name);
 228
 229   #ifdef HAVE_TRACING
 230   TRACE_msg_vm_create(name, ind_pm);
 231   #endif
 232
 233   return ind_vm;
 234 }
 235
 236 /** @brief Destroy a VM. Destroy the VM object from the simulation.
 237  *  @ingroup msg_VMs
 238  */
 239 void MSG_vm_destroy(msg_vm_t vm)
 240 {
 241   /* First, terminate all processes on the VM if necessary */
 242   if (MSG_vm_is_running(vm))
 243       simcall_vm_shutdown(vm);
 244
 245   if (!MSG_vm_is_created(vm)) {
 246     XBT_CRITICAL("shutdown the given VM before destroying it");
 247     DIE_IMPOSSIBLE;
 248   }
 249
 250   /* Then, destroy the VM object */
 251   simcall_vm_destroy(vm);
 252
 253   __MSG_host_destroy(vm);
 254
 255   #ifdef HAVE_TRACING
 256   TRACE_msg_vm_end(vm);
 257   #endif
 258 }
 259
 260
 261 /** @brief Start a vm (i.e., boot the guest operating system)
 262  *  @ingroup msg_VMs
 263  *
 264  *  If the VM cannot be started, an exception is generated.
 265  *
 266  */
 267 void MSG_vm_start(msg_vm_t vm)
 268 {
 269   simcall_vm_start(vm);
 270
 271   #ifdef HAVE_TRACING
 272   TRACE_msg_vm_start(vm);
 273   #endif
 274 }
 275
 276
 277
 278 /** @brief Immediately kills all processes within the given VM. Any memory that they allocated will be leaked.
 279  *  @ingroup msg_VMs
 280  *
 281  * FIXME: No extra delay occurs. If you want to simulate this too, you want to
 282  * use a #MSG_process_sleep() or something. I'm not quite sure.
 283  */
 284 void MSG_vm_shutdown(msg_vm_t vm)
 285 {
 286   /* msg_vm_t equals to msg_host_t */
 287   simcall_vm_shutdown(vm);
 288
 289   // #ifdef HAVE_TRACING
 290   // TRACE_msg_vm_(vm);
 291   // #endif
 292 }
 293
 294
 295
 296 /* We have two mailboxes. mbox is used to transfer migration data between
 297  * source and destiantion PMs. mbox_ctl is used to detect the completion of a
 298  * migration. The names of these mailboxes must not conflict with others. */
 299 static inline char *get_mig_mbox_src_dst(const char *vm_name, const char *src_pm_name, const char *dst_pm_name)
 300 {
 301   return bprintf("__mbox_mig_src_dst:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name);
 302 }
 303
 304 static inline char *get_mig_mbox_ctl(const char *vm_name, const char *src_pm_name, const char *dst_pm_name)
 305 {
 306   return bprintf("__mbox_mig_ctl:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name);
 307 }
 308
 309 static inline char *get_mig_process_tx_name(const char *vm_name, const char *src_pm_name, const char *dst_pm_name)
 310 {
 311   return bprintf("__pr_mig_tx:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name);
 312 }
 313
 314 static inline char *get_mig_process_rx_name(const char *vm_name, const char *src_pm_name, const char *dst_pm_name)
 315 {
 316   return bprintf("__pr_mig_rx:%s(%s-%s)", vm_name, src_pm_name, dst_pm_name);
 317 }
 318
 319 static inline char *get_mig_task_name(const char *vm_name, const char *src_pm_name, const char *dst_pm_name, int stage)
 320 {
 321   return bprintf("__task_mig_stage%d:%s(%s-%s)", stage, vm_name, src_pm_name, dst_pm_name);
 322 }
 323
 324 static void launch_deferred_exec_process(msg_host_t host, double computation, double prio);
 325
 326 static int migration_rx_fun(int argc, char *argv[])
 327 {
 328   XBT_DEBUG("mig: rx_start");
 329
 330   xbt_assert(argc == 4);
 331   const char *vm_name = argv[1];
 332   const char *src_pm_name  = argv[2];
 333   const char *dst_pm_name  = argv[3];
 334   msg_vm_t vm = MSG_get_host_by_name(vm_name);
 335   msg_host_t src_pm = MSG_get_host_by_name(src_pm_name);
 336   msg_host_t dst_pm = MSG_get_host_by_name(dst_pm_name);
 337
 338
 339   s_ws_params_t params;
 340   simcall_host_get_params(vm, &params);
 341   const double xfer_cpu_overhead = params.xfer_cpu_overhead;
 342
 343
 344   int need_exit = 0;
 345
 346   char *mbox = get_mig_mbox_src_dst(vm_name, src_pm_name, dst_pm_name);
 347   char *mbox_ctl = get_mig_mbox_ctl(vm_name, src_pm_name, dst_pm_name);
 348   char *finalize_task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, 3);
 349
 350   for (;;) {
 351     msg_task_t task = NULL;
 352     MSG_task_recv(&task, mbox);
 353     {
 354       double received = MSG_task_get_data_size(task);
 355       /* TODO: clean up */
 356       // const double alpha = 0.22L * 1.0E8 / (80L * 1024 * 1024);
 357       launch_deferred_exec_process(vm, received * xfer_cpu_overhead, 1);
 358     }
 359
 360     if (strcmp(task->name, finalize_task_name) == 0)
 361       need_exit = 1;
 362
 363     MSG_task_destroy(task);
 364
 365     if (need_exit)
 366       break;
 367   }
 368
 369
 370   /* deinstall the current affinity setting */
 371   simcall_vm_set_affinity(vm, src_pm, 0);
 372
 373   simcall_vm_migrate(vm, dst_pm);
 374   simcall_vm_resume(vm);
 375
 376   /* install the affinity setting of the VM on the destination pm */
 377   {
 378     msg_host_priv_t priv = msg_host_resource_priv(vm);
 379
 380     unsigned long affinity_mask = (unsigned long) xbt_dict_get_or_null_ext(priv->affinity_mask_db, (char *) dst_pm, sizeof(msg_host_t));
 381     simcall_vm_set_affinity(vm, dst_pm, affinity_mask);
 382     XBT_INFO("set affinity(0x%04lx@%s) for %s", affinity_mask, MSG_host_get_name(dst_pm), MSG_host_get_name(vm));
 383   }
 384
 385   {
 386     char *task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, 4);
 387
 388     msg_task_t task = MSG_task_create(task_name, 0, 0, NULL);
 389     msg_error_t ret = MSG_task_send(task, mbox_ctl);
 390     xbt_assert(ret == MSG_OK);
 391
 392     xbt_free(task_name);
 393   }
 394
 395
 396   xbt_free(mbox);
 397   xbt_free(mbox_ctl);
 398   xbt_free(finalize_task_name);
 399
 400   XBT_DEBUG("mig: rx_done");
 401
 402   return 0;
 403 }
 404
 405 static void reset_dirty_pages(msg_vm_t vm)
 406 {
 407   msg_host_priv_t priv = msg_host_resource_priv(vm);
 408
 409   char *key = NULL;
 410   xbt_dict_cursor_t cursor = NULL;
 411   dirty_page_t dp = NULL;
 412   xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
 413     double remaining = MSG_task_get_remaining_computation(dp->task);
 414     dp->prev_clock = MSG_get_clock();
 415     dp->prev_remaining = remaining;
 416
 417     // XBT_INFO("%s@%s remaining %f", key, sg_host_name(vm), remaining);
 418   }
 419 }
 420
 421 static void start_dirty_page_tracking(msg_vm_t vm)
 422 {
 423   msg_host_priv_t priv = msg_host_resource_priv(vm);
 424   priv->dp_enabled = 1;
 425
 426   reset_dirty_pages(vm);
 427 }
 428
 429 static void stop_dirty_page_tracking(msg_vm_t vm)
 430 {
 431   msg_host_priv_t priv = msg_host_resource_priv(vm);
 432   priv->dp_enabled = 0;
 433 }
 434
 435 #if 0
 436 /* It might be natural that we define dp_rate for each task. But, we will also
 437  * have to care about how each task behavior affects the memory update behavior
 438  * at the operating system level. It may not be easy to model it with a simple algorithm. */
 439 double calc_updated_pages(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
 440 {
 441     double computed = dp->prev_remaining - remaining;
 442     double duration = clock - dp->prev_clock;
 443     double updated = dp->task->dp_rate * computed;
 444
 445     XBT_INFO("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
 446         key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
 447     XBT_INFO("%s@%s: updated %f bytes, %f Mbytes/s",
 448         key, sg_host_name(vm), updated, updated / duration / 1000 / 1000);
 449
 450     return updated;
 451 }
 452 #endif
 453
 454 static double get_computed(char *key, msg_vm_t vm, dirty_page_t dp, double remaining, double clock)
 455 {
 456   double computed = dp->prev_remaining - remaining;
 457   double duration = clock - dp->prev_clock;
 458
 459   XBT_DEBUG("%s@%s: computated %f ops (remaining %f -> %f) in %f secs (%f -> %f)",
 460       key, sg_host_name(vm), computed, dp->prev_remaining, remaining, duration, dp->prev_clock, clock);
 461
 462   return computed;
 463 }
 464
 465 static double lookup_computed_flop_counts(msg_vm_t vm, int stage_for_fancy_debug, int stage2_round_for_fancy_debug)
 466 {
 467   msg_host_priv_t priv = msg_host_resource_priv(vm);
 468   double total = 0;
 469
 470   char *key = NULL;
 471   xbt_dict_cursor_t cursor = NULL;
 472   dirty_page_t dp = NULL;
 473   xbt_dict_foreach(priv->dp_objs, cursor, key, dp) {
 474     double remaining = MSG_task_get_remaining_computation(dp->task);
 475
 476          double clock = MSG_get_clock();
 477
 478     // total += calc_updated_pages(key, vm, dp, remaining, clock);
 479     total += get_computed(key, vm, dp, remaining, clock);
 480
 481     dp->prev_remaining = remaining;
 482     dp->prev_clock = clock;
 483   }
 484
 485   total += priv->dp_updated_by_deleted_tasks;
 486
 487   XBT_DEBUG("mig-stage%d.%d: computed %f flop_counts (including %f by deleted tasks)",
 488       stage_for_fancy_debug,
 489       stage2_round_for_fancy_debug,
 490       total, priv->dp_updated_by_deleted_tasks);
 491
 492
 493
 494   priv->dp_updated_by_deleted_tasks = 0;
 495
 496
 497   return total;
 498 }
 499
 500 // TODO Is this code redundant with the information provided by
 501 // msg_process_t MSG_process_create(const char *name, xbt_main_func_t code, void *data, msg_host_t host)
 502 void MSG_host_add_task(msg_host_t host, msg_task_t task)
 503 {
 504   msg_host_priv_t priv = msg_host_resource_priv(host);
 505   double remaining = MSG_task_get_remaining_computation(task);
 506   char *key = bprintf("%s-%lld", task->name, task->counter);
 507
 508   dirty_page_t dp = xbt_new0(s_dirty_page, 1);
 509   dp->task = task;
 510
 511   /* It should be okay that we add a task onto a migrating VM. */
 512   if (priv->dp_enabled) {
 513     dp->prev_clock = MSG_get_clock();
 514     dp->prev_remaining = remaining;
 515   }
 516
 517   xbt_assert(xbt_dict_get_or_null(priv->dp_objs, key) == NULL);
 518   xbt_dict_set(priv->dp_objs, key, dp, NULL);
 519   XBT_DEBUG("add %s on %s (remaining %f, dp_enabled %d)", key, sg_host_name(host), remaining, priv->dp_enabled);
 520
 521   xbt_free(key);
 522 }
 523
 524 void MSG_host_del_task(msg_host_t host, msg_task_t task)
 525 {
 526   msg_host_priv_t priv = msg_host_resource_priv(host);
 527
 528   char *key = bprintf("%s-%lld", task->name, task->counter);
 529
 530   dirty_page_t dp = xbt_dict_get_or_null(priv->dp_objs, key);
 531   xbt_assert(dp->task == task);
 532
 533   /* If we are in the middle of dirty page tracking, we record how much
 534    * computaion has been done until now, and keep the information for the
 535    * lookup_() function that will called soon. */
 536   if (priv->dp_enabled) {
 537     double remaining = MSG_task_get_remaining_computation(task);
 538     double clock = MSG_get_clock();
 539     // double updated = calc_updated_pages(key, host, dp, remaining, clock);
 540     double updated = get_computed(key, host, dp, remaining, clock);
 541
 542     priv->dp_updated_by_deleted_tasks += updated;
 543   }
 544
 545   xbt_dict_remove(priv->dp_objs, key);
 546   xbt_free(dp);
 547
 548   XBT_DEBUG("del %s on %s", key, sg_host_name(host));
 549
 550   xbt_free(key);
 551 }
 552
 553
 554 static int deferred_exec_fun(int argc, char *argv[])
 555 {
 556   xbt_assert(argc == 3);
 557   const char *comp_str = argv[1];
 558   double computaion = atof(comp_str);
 559   const char *prio_str = argv[2];
 560   double prio = atof(prio_str);
 561
 562   msg_task_t task = MSG_task_create("__task_deferred", computaion, 0, NULL);
 563   // XBT_INFO("exec deferred %f", computaion);
 564
 565   /* dpt is the results of the VM activity */
 566   MSG_task_set_priority(task, prio);
 567   MSG_task_execute(task);
 568
 569
 570
 571   MSG_task_destroy(task);
 572
 573   return 0;
 574 }
 575
 576 static void launch_deferred_exec_process(msg_host_t host, double computation, double prio)
 577 {
 578   char *pr_name = bprintf("__pr_deferred_exec_%s", MSG_host_get_name(host));
 579
 580   int nargvs = 4;
 581   char **argv = xbt_new(char *, nargvs);
 582   argv[0] = pr_name;
 583   argv[1] = bprintf("%f", computation);
 584   argv[2] = bprintf("%f", prio);
 585   argv[3] = NULL;
 586
 587   MSG_process_create_with_arguments(pr_name, deferred_exec_fun, NULL, host, nargvs - 1, argv);
 588 }
 589
 590
 591 static int task_tx_overhead_fun(int argc, char *argv[])
 592 {
 593   xbt_assert(argc == 2);
 594   const char *mbox = argv[1];
 595
 596   int need_exit = 0;
 597
 598   // XBT_INFO("start %s", mbox);
 599
 600   for (;;) {
 601     msg_task_t task = NULL;
 602     MSG_task_recv(&task, mbox);
 603
 604     // XBT_INFO("task->name %s", task->name);
 605
 606     if (strcmp(task->name, "finalize_making_overhead") == 0)
 607       need_exit = 1;
 608
 609     // XBT_INFO("exec");
 610     // MSG_task_set_priority(task, 1000000);
 611     MSG_task_execute(task);
 612     MSG_task_destroy(task);
 613
 614     if (need_exit)
 615       break;
 616   }
 617
 618   // XBT_INFO("bye");
 619
 620   return 0;
 621 }
 622
 623 static void start_overhead_process(msg_task_t comm_task)
 624 {
 625   char *pr_name = bprintf("__pr_task_tx_overhead_%s", MSG_task_get_name(comm_task));
 626   char *mbox    = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
 627
 628   int nargvs = 3;
 629   char **argv = xbt_new(char *, nargvs);
 630   argv[0] = pr_name;
 631   argv[1] = mbox;
 632   argv[2] = NULL;
 633
 634   // XBT_INFO("micro start: mbox %s", mbox);
 635   MSG_process_create_with_arguments(pr_name, task_tx_overhead_fun, NULL, MSG_host_self(), nargvs - 1, argv);
 636 }
 637
 638 static void shutdown_overhead_process(msg_task_t comm_task)
 639 {
 640   char *mbox = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
 641
 642   msg_task_t task = MSG_task_create("finalize_making_overhead", 0, 0, NULL);
 643
 644   // XBT_INFO("micro shutdown: mbox %s", mbox);
 645   msg_error_t ret = MSG_task_send(task, mbox);
 646   xbt_assert(ret == MSG_OK);
 647
 648   xbt_free(mbox);
 649   // XBT_INFO("shutdown done");
 650 }
 651
 652 static void request_overhead(msg_task_t comm_task, double computation)
 653 {
 654   char *mbox = bprintf("__mb_task_tx_overhead_%s", MSG_task_get_name(comm_task));
 655
 656   msg_task_t task = MSG_task_create("micro", computation, 0, NULL);
 657
 658   // XBT_INFO("req overhead");
 659   msg_error_t ret = MSG_task_send(task, mbox);
 660   xbt_assert(ret == MSG_OK);
 661
 662   xbt_free(mbox);
 663 }
 664
 665 /* alpha is (floating_operations / bytes).
 666  *
 667  * When actual migration traffic was 32 mbytes/s, we observed the CPU
 668  * utilization of the main thread of the Qemu process was 10 %.
 669  *   alpha = 0.1 * C / (32 * 1024 * 1024)
 670  * where the CPU capacity of the PM is C flops/s.
 671  *
 672  * */
 673 static void task_send_bounded_with_cpu_overhead(msg_task_t comm_task, char *mbox, double mig_speed, double alpha)
 674 {
 675   const double chunk_size = 1024 * 1024 * 10;
 676   double remaining = MSG_task_get_data_size(comm_task);
 677
 678   start_overhead_process(comm_task);
 679
 680
 681   while (remaining > 0) {
 682     double data_size = chunk_size;
 683     if (remaining < chunk_size)
 684       data_size = remaining;
 685
 686     remaining -= data_size;
 687
 688     // XBT_INFO("remaining %f bytes", remaining);
 689
 690
 691     double clock_sta = MSG_get_clock();
 692
 693     /* create a micro task */
 694     {
 695       char *mtask_name = bprintf("__micro_%s", MSG_task_get_name(comm_task));
 696       msg_task_t mtask = MSG_task_create(mtask_name, 0, data_size, NULL);
 697
 698       request_overhead(comm_task, data_size * alpha);
 699
 700       msg_error_t ret = MSG_task_send(mtask, mbox);
 701       xbt_assert(ret == MSG_OK);
 702
 703       xbt_free(mtask_name);
 704     }
 705
 706 #if 0
 707     {
 708       /* In the real world, sending data involves small CPU computation. */
 709       char *mtask_name = bprintf("__micro_%s", MSG_task_get_name(comm_task));
 710       msg_task_t mtask = MSG_task_create(mtask_name, data_size * alpha, data_size, NULL);
 711       MSG_task_execute(mtask);
 712       MSG_task_destroy(mtask);
 713       xbt_free(mtask_name);
 714     }
 715 #endif
 716
 717     /* TODO */
 718
 719     double clock_end = MSG_get_clock();
 720
 721
 722     if (mig_speed > 0) {
 723       /*
 724        * (max bandwidth) > data_size / ((elapsed time) + time_to_sleep)
 725        *
 726        * Thus, we get
 727        *   time_to_sleep > data_size / (max bandwidth) - (elapsed time)
 728        *
 729        * If time_to_sleep is smaller than zero, the elapsed time was too big. We
 730        * do not need a micro sleep.
 731        **/
 732       double time_to_sleep = data_size / mig_speed - (clock_end - clock_sta);
 733       if (time_to_sleep > 0)
 734         MSG_process_sleep(time_to_sleep);
 735
 736
 737       //XBT_INFO("duration %f", clock_end - clock_sta);
 738       //XBT_INFO("time_to_sleep %f", time_to_sleep);
 739     }
 740   }
 741
 742   // XBT_INFO("%s", MSG_task_get_name(comm_task));
 743   shutdown_overhead_process(comm_task);
 744
 745 }
 746
 747
 748 #if 0
 749 static void make_cpu_overhead_of_data_transfer(msg_task_t comm_task, double init_comm_size)
 750 {
 751   double prev_remaining = init_comm_size;
 752
 753   for (;;) {
 754     double remaining = MSG_task_get_remaining_communication(comm_task);
 755     if (remaining == 0)
 756       need_exit = 1;
 757
 758     double sent = prev_remaining - remaining;
 759     double comp_size = sent * overhead;
 760
 761
 762     char *comp_task_name = bprintf("__sender_overhead%s", MSG_task_get_name(comm_task));
 763     msg_task_t comp_task = MSG_task_create(comp_task_name, comp_size, 0, NULL);
 764     MSG_task_execute(comp_task);
 765     MSG_task_destroy(comp_task);
 766
 767     if (need_exit)
 768       break;
 769
 770     prev_remaining = remaining;
 771
 772   }
 773
 774   xbt_free(comp_task_name);
 775 }
 776 #endif
 777
 778 // #define USE_MICRO_TASK 1
 779
 780 #if 0
 781 // const double alpha = 0.1L * 1.0E8 / (32L * 1024 * 1024);
 782 // const double alpha = 0.25L * 1.0E8 / (85L * 1024 * 1024);
 783 // const double alpha = 0.20L * 1.0E8 / (85L * 1024 * 1024);
 784 // const double alpha = 0.25L * 1.0E8 / (85L * 1024 * 1024);
 785 // const double alpha = 0.32L * 1.0E8 / (24L * 1024 * 1024);   // makes super good values for 32 mbytes/s
 786 //const double alpha = 0.32L * 1.0E8 / (32L * 1024 * 1024);
 787 // const double alpha = 0.56L * 1.0E8 / (80L * 1024 * 1024);
 788 ////const double alpha = 0.20L * 1.0E8 / (80L * 1024 * 1024);
 789 // const double alpha = 0.56L * 1.0E8 / (90L * 1024 * 1024);
 790 // const double alpha = 0.66L * 1.0E8 / (90L * 1024 * 1024);
 791 // const double alpha = 0.20L * 1.0E8 / (80L * 1024 * 1024);
 792
 793 /* CPU 22% when 80Mbyte/s */
 794 const double alpha = 0.22L * 1.0E8 / (80L * 1024 * 1024);
 795 #endif
 796
 797
 798 static void send_migration_data(const char *vm_name, const char *src_pm_name, const char *dst_pm_name,
 799     sg_size_t size, char *mbox, int stage, int stage2_round, double mig_speed, double xfer_cpu_overhead)
 800 {
 801   char *task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, stage);
 802   msg_task_t task = MSG_task_create(task_name, 0, size, NULL);
 803
 804   /* TODO: clean up */
 805
 806   double clock_sta = MSG_get_clock();
 807
 808 #ifdef USE_MICRO_TASK
 809
 810   task_send_bounded_with_cpu_overhead(task, mbox, mig_speed, xfer_cpu_overhead);
 811
 812 #else
 813   msg_error_t ret;
 814   if (mig_speed > 0)
 815     ret = MSG_task_send_bounded(task, mbox, mig_speed);
 816   else
 817     ret = MSG_task_send(task, mbox);
 818   xbt_assert(ret == MSG_OK);
 819 #endif
 820
 821   double clock_end = MSG_get_clock();
 822   double duration = clock_end - clock_sta;
 823   double actual_speed = size / duration;
 824 #ifdef USE_MICRO_TASK
 825   double cpu_utilization = size * xfer_cpu_overhead / duration / 1.0E8;
 826 #else
 827   double cpu_utilization = 0;
 828 #endif
 829
 830
 831
 832
 833   if (stage == 2){
 834     XBT_DEBUG("mig-stage%d.%d: sent %llu duration %f actual_speed %f (target %f) cpu %f", stage, stage2_round, size, duration, actual_speed, mig_speed, cpu_utilization);}
 835   else{
 836     XBT_DEBUG("mig-stage%d: sent %llu duration %f actual_speed %f (target %f) cpu %f", stage, size, duration, actual_speed, mig_speed, cpu_utilization);
 837   }
 838
 839   xbt_free(task_name);
 840
 841
 842
 843 #ifdef USE_MICRO_TASK
 844   /* The name of a micro task starts with __micro, which does not match the
 845    * special name that finalizes the receiver loop. Thus, we send the special task.
 846    **/
 847   {
 848     if (stage == 3) {
 849       char *task_name = get_mig_task_name(vm_name, src_pm_name, dst_pm_name, stage);
 850       msg_task_t task = MSG_task_create(task_name, 0, 0, NULL);
 851       msg_error_t ret = MSG_task_send(task, mbox);
 852       xbt_assert(ret == MSG_OK);
 853       xbt_free(task_name);
 854     }
 855   }
 856 #endif
 857 }
 858
 859 static double get_updated_size(double computed, double dp_rate, double dp_cap)
 860 {
 861   double updated_size = computed * dp_rate;
 862   XBT_DEBUG("updated_size %f dp_rate %f", updated_size, dp_rate);
 863   if (updated_size > dp_cap) {
 864     // XBT_INFO("mig-stage2.%d: %f bytes updated, but cap it with the working set size %f", stage2_round, updated_size, dp_cap);
 865     updated_size = dp_cap;
 866   }
 867
 868   return updated_size;
 869 }
 870
 871 static double send_stage1(msg_host_t vm, const char *src_pm_name, const char *dst_pm_name,
 872     sg_size_t ramsize, double mig_speed, double xfer_cpu_overhead, double dp_rate, double dp_cap, double dpt_cpu_overhead)
 873 {
 874   const char *vm_name = MSG_host_get_name(vm);
 875   char *mbox = get_mig_mbox_src_dst(vm_name, src_pm_name, dst_pm_name);
 876
 877   // const long chunksize = (sg_size_t)1024 * 1024 * 100;
 878   const sg_size_t chunksize = (sg_size_t)1024 * 1024 * 100000;
 879   sg_size_t remaining = ramsize;
 880   double computed_total = 0;
 881
 882   while (remaining > 0) {
 883     sg_size_t datasize = chunksize;
 884     if (remaining < chunksize)
 885       datasize = remaining;
 886
 887     remaining -= datasize;
 888
 889     send_migration_data(vm_name, src_pm_name, dst_pm_name, datasize, mbox, 1, 0, mig_speed, xfer_cpu_overhead);
 890     double computed = lookup_computed_flop_counts(vm, 1, 0);
 891     computed_total += computed;
 892
 893     // {
 894     //   double updated_size = get_updated_size(computed, dp_rate, dp_cap);
 895
 896     //   double overhead = dpt_cpu_overhead * updated_size;
 897     //   launch_deferred_exec_process(vm, overhead, 10000);
 898     // }
 899   }
 900   xbt_free(mbox);
 901   return computed_total;
 902 }
 903
 904
 905
 906 static double get_threshold_value(double bandwidth, double max_downtime)
 907 {
 908   /* This value assumes the network link is 1Gbps. */
 909   // double threshold = max_downtime * 125 * 1024 * 1024;
 910   double threshold = max_downtime * bandwidth;
 911
 912   return threshold;
 913 }
 914
 915 static int migration_tx_fun(int argc, char *argv[])
 916 {
 917   XBT_DEBUG("mig: tx_start");
 918
 919   xbt_assert(argc == 4);
 920   const char *vm_name = argv[1];
 921   const char *src_pm_name  = argv[2];
 922   const char *dst_pm_name  = argv[3];
 923   msg_vm_t vm = MSG_get_host_by_name(vm_name);
 924
 925
 926   s_ws_params_t params;
 927   simcall_host_get_params(vm, &params);
 928   const sg_size_t ramsize   = params.ramsize;
 929   const sg_size_t devsize   = params.devsize;
 930   const int skip_stage1     = params.skip_stage1;
 931   const int skip_stage2     = params.skip_stage2;
 932   const double dp_rate      = params.dp_rate;
 933   const double dp_cap       = params.dp_cap;
 934   const double mig_speed    = params.mig_speed;
 935   const double xfer_cpu_overhead = params.xfer_cpu_overhead;
 936   const double dpt_cpu_overhead = params.dpt_cpu_overhead;
 937
 938   double remaining_size = ramsize + devsize;
 939
 940   double max_downtime = params.max_downtime;
 941   if (max_downtime == 0) {
 942     XBT_WARN("use the default max_downtime value 30ms");
 943     max_downtime = 0.03;
 944   }
 945
 946   double threshold = 0.00001; /* TODO: cleanup */
 947
 948   /* setting up parameters has done */
 949
 950
 951   if (ramsize == 0)
 952     XBT_WARN("migrate a VM, but ramsize is zero");
 953
 954   char *mbox = get_mig_mbox_src_dst(vm_name, src_pm_name, dst_pm_name);
 955
 956   XBT_INFO("mig-stage1: remaining_size %f", remaining_size);
 957
 958   /* Stage1: send all memory pages to the destination. */
 959   start_dirty_page_tracking(vm);
 960
 961   double computed_during_stage1 = 0;
 962   if (!skip_stage1) {
 963     // send_migration_data(vm_name, src_pm_name, dst_pm_name, ramsize, mbox, 1, 0, mig_speed, xfer_cpu_overhead);
 964
 965     /* send ramsize, but split it */
 966     double clock_prev_send = MSG_get_clock();
 967
 968     computed_during_stage1 = send_stage1(vm, src_pm_name, dst_pm_name, ramsize, mig_speed, xfer_cpu_overhead, dp_rate, dp_cap, dpt_cpu_overhead);
 969     remaining_size -= ramsize;
 970
 971     double clock_post_send = MSG_get_clock();
 972     double bandwidth = ramsize / (clock_post_send - clock_prev_send);
 973     threshold = get_threshold_value(bandwidth, max_downtime);
 974     XBT_INFO("actual banwdidth %f (MB/s), threshold %f", bandwidth / 1024 / 1024, threshold);
 975   }
 976
 977
 978   /* Stage2: send update pages iteratively until the size of remaining states
 979    * becomes smaller than the threshold value. */
 980   if (skip_stage2)
 981     goto stage3;
 982   if (max_downtime == 0) {
 983     XBT_WARN("no max_downtime parameter, skip stage2");
 984     goto stage3;
 985   }
 986
 987
 988   int stage2_round = 0;
 989   for (;;) {
 990
 991     double updated_size = 0;
 992     if (stage2_round == 0)  {
 993       /* just after stage1, nothing has been updated. But, we have to send the data updated during stage1 */
 994       updated_size = get_updated_size(computed_during_stage1, dp_rate, dp_cap);
 995     } else {
 996       double computed = lookup_computed_flop_counts(vm, 2, stage2_round);
 997       updated_size = get_updated_size(computed, dp_rate, dp_cap);
 998     }
 999
1000     XBT_INFO("mig-stage 2:%d updated_size %f computed_during_stage1 %f dp_rate %f dp_cap %f",
1001         stage2_round, updated_size, computed_during_stage1, dp_rate, dp_cap);
1002
1003
1004     // if (stage2_round != 0) {
1005     //   /* during stage1, we have already created overhead tasks */
1006     //   double overhead = dpt_cpu_overhead * updated_size;
1007     //   XBT_DEBUG("updated %f overhead %f", updated_size, overhead);
1008     //   launch_deferred_exec_process(vm, overhead, 10000);
1009     // }
1010
1011
1012     {
1013       remaining_size += updated_size;
1014
1015       XBT_INFO("mig-stage2.%d: remaining_size %f (%s threshold %f)", stage2_round,
1016           remaining_size, (remaining_size < threshold) ? "<" : ">", threshold);
1017
1018       if (remaining_size < threshold)
1019         break;
1020     }
1021
1022     double clock_prev_send = MSG_get_clock();
1023
1024     send_migration_data(vm_name, src_pm_name, dst_pm_name, updated_size, mbox, 2, stage2_round, mig_speed, xfer_cpu_overhead);
1025
1026     double clock_post_send = MSG_get_clock();
1027
1028     double bandwidth = updated_size / (clock_post_send - clock_prev_send);
1029     threshold = get_threshold_value(bandwidth, max_downtime);
1030     XBT_INFO("actual banwdidth %f, threshold %f", bandwidth / 1024 / 1024, threshold);
1031
1032
1033
1034
1035
1036
1037
1038     remaining_size -= updated_size;
1039     stage2_round += 1;
1040   }
1041
1042
1043 stage3:
1044   /* Stage3: stop the VM and copy the rest of states. */
1045   XBT_INFO("mig-stage3: remaining_size %f", remaining_size);
1046   simcall_vm_suspend(vm);
1047   stop_dirty_page_tracking(vm);
1048
1049   send_migration_data(vm_name, src_pm_name, dst_pm_name, remaining_size, mbox, 3, 0, mig_speed, xfer_cpu_overhead);
1050
1051   xbt_free(mbox);
1052
1053   XBT_DEBUG("mig: tx_done");
1054
1055   return 0;
1056 }
1057
1058
1059
1060 static void do_migration(msg_vm_t vm, msg_host_t src_pm, msg_host_t dst_pm)
1061 {
1062   char *mbox_ctl = get_mig_mbox_ctl(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
1063
1064   {
1065     char *pr_name = get_mig_process_rx_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
1066     int nargvs = 5;
1067     char **argv = xbt_new(char *, nargvs);
1068     argv[0] = pr_name;
1069     argv[1] = xbt_strdup(sg_host_name(vm));
1070     argv[2] = xbt_strdup(sg_host_name(src_pm));
1071     argv[3] = xbt_strdup(sg_host_name(dst_pm));
1072     argv[4] = NULL;
1073
1074     MSG_process_create_with_arguments(pr_name, migration_rx_fun, NULL, dst_pm, nargvs - 1, argv);
1075   }
1076
1077   {
1078     char *pr_name = get_mig_process_tx_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm));
1079     int nargvs = 5;
1080     char **argv = xbt_new(char *, nargvs);
1081     argv[0] = pr_name;
1082     argv[1] = xbt_strdup(sg_host_name(vm));
1083     argv[2] = xbt_strdup(sg_host_name(src_pm));
1084     argv[3] = xbt_strdup(sg_host_name(dst_pm));
1085     argv[4] = NULL;
1086     MSG_process_create_with_arguments(pr_name, migration_tx_fun, NULL, src_pm, nargvs - 1, argv);
1087   }
1088
1089   /* wait until the migration have finished */
1090   {
1091     msg_task_t task = NULL;
1092     msg_error_t ret = MSG_task_recv(&task, mbox_ctl);
1093
1094     xbt_assert(ret == MSG_OK);
1095
1096     char *expected_task_name = get_mig_task_name(sg_host_name(vm), sg_host_name(src_pm), sg_host_name(dst_pm), 4);
1097     xbt_assert(strcmp(task->name, expected_task_name) == 0);
1098     xbt_free(expected_task_name);
1099     MSG_task_destroy(task);
1100   }
1101
1102   xbt_free(mbox_ctl);
1103 }
1104
1105
1106 /** @brief Migrate the VM to the given host.
1107  *  @ingroup msg_VMs
1108  *
1109  * FIXME: No migration cost occurs. If you want to simulate this too, you want to use a
1110  * MSG_task_send() before or after, depending on whether you want to do cold or hot
1111  * migration.
1112  */
1113 void MSG_vm_migrate(msg_vm_t vm, msg_host_t new_pm)
1114 {
1115   /* some thoughts:
1116    * - One approach is ...
1117    *   We first create a new VM (i.e., destination VM) on the destination
1118    *   physical host. The destination VM will receive the state of the source
1119    *   VM over network. We will finally destroy the source VM.
1120    *   - This behavior is similar to the way of migration in the real world.
1121    *     Even before a migration is completed, we will see a destination VM,
1122    *     consuming resources.
1123    *   - We have to relocate all processes. The existing process migraion code
1124    *     will work for this?
1125    *   - The name of the VM is a somewhat unique ID in the code. It is tricky
1126    *     for the destination VM?
1127    *
1128    * - Another one is ...
1129    *   We update the information of the given VM to place it to the destination
1130    *   physical host.
1131    *
1132    * The second one would be easier.
1133    *
1134    */
1135
1136   msg_host_t old_pm = simcall_vm_get_pm(vm);
1137
1138   if (simcall_vm_get_state(vm) != SURF_VM_STATE_RUNNING)
1139     THROWF(vm_error, 0, "VM(%s) is not running", sg_host_name(vm));
1140
1141   do_migration(vm, old_pm, new_pm);
1142
1143
1144
1145   XBT_DEBUG("VM(%s) moved from PM(%s) to PM(%s)", vm->key, old_pm->key, new_pm->key);
1146
1147   #ifdef HAVE_TRACING
1148   TRACE_msg_vm_change_host(vm, old_pm, new_pm);
1149   #endif
1150 }
1151
1152
1153 /** @brief Immediately suspend the execution of all processes within the given VM.
1154  *  @ingroup msg_VMs
1155  *
1156  * This function stops the exection of the VM. All the processes on this VM
1157  * will pause. The state of the VM is perserved. We can later resume it again.
1158  *
1159  * No suspension cost occurs.
1160  */
1161 void MSG_vm_suspend(msg_vm_t vm)
1162 {
1163   simcall_vm_suspend(vm);
1164
1165   XBT_DEBUG("vm_suspend done");
1166
1167   #ifdef HAVE_TRACING
1168   TRACE_msg_vm_suspend(vm);
1169   #endif
1170 }
1171
1172
1173 /** @brief Resume the execution of the VM. All processes on the VM run again.
1174  *  @ingroup msg_VMs
1175  *
1176  * No resume cost occurs.
1177  */
1178 void MSG_vm_resume(msg_vm_t vm)
1179 {
1180   simcall_vm_resume(vm);
1181
1182   #ifdef HAVE_TRACING
1183   TRACE_msg_vm_resume(vm);
1184   #endif
1185 }
1186
1187
1188 /** @brief Immediately save the execution of all processes within the given VM.
1189  *  @ingroup msg_VMs
1190  *
1191  * This function stops the exection of the VM. All the processes on this VM
1192  * will pause. The state of the VM is perserved. We can later resume it again.
1193  *
1194  * FIXME: No suspension cost occurs. If you want to simulate this too, you want to
1195  * use a \ref MSG_file_write() before or after, depending on the exact semantic
1196  * of VM save to you.
1197  */
1198 void MSG_vm_save(msg_vm_t vm)
1199 {
1200   simcall_vm_save(vm);
1201   #ifdef HAVE_TRACING
1202   TRACE_msg_vm_save(vm);
1203   #endif
1204 }
1205
1206 /** @brief Restore the execution of the VM. All processes on the VM run again.
1207  *  @ingroup msg_VMs
1208  *
1209  * FIXME: No restore cost occurs. If you want to simulate this too, you want to
1210  * use a \ref MSG_file_read() before or after, depending on the exact semantic
1211  * of VM restore to you.
1212  */
1213 void MSG_vm_restore(msg_vm_t vm)
1214 {
1215   simcall_vm_restore(vm);
1216
1217   #ifdef HAVE_TRACING
1218   TRACE_msg_vm_restore(vm);
1219   #endif
1220 }
1221
1222
1223 /** @brief Get the physical host of a given VM.
1224  *  @ingroup msg_VMs
1225  */
1226 msg_host_t MSG_vm_get_pm(msg_vm_t vm)
1227 {
1228   return simcall_vm_get_pm(vm);
1229 }
1230
1231
1232 /** @brief Set a CPU bound for a given VM.
1233  *  @ingroup msg_VMs
1234  *
1235  * 1.
1236  * Note that in some cases MSG_task_set_bound() may not intuitively work for VMs.
1237  *
1238  * For example,
1239  *  On PM0, there are Task1 and VM0.
1240  *  On VM0, there is Task2.
1241  * Now we bound 75% to Task1\@PM0 and bound 25% to Task2\@VM0.
1242  * Then,
1243  *  Task1\@PM0 gets 50%.
1244  *  Task2\@VM0 gets 25%.
1245  * This is NOT 75% for Task1\@PM0 and 25% for Task2\@VM0, respectively.
1246  *
1247  * This is because a VM has the dummy CPU action in the PM layer. Putting a
1248  * task on the VM does not affect the bound of the dummy CPU action. The bound
1249  * of the dummy CPU action is unlimited.
1250  *
1251  * There are some solutions for this problem. One option is to update the bound
1252  * of the dummy CPU action automatically. It should be the sum of all tasks on
1253  * the VM. But, this solution might be costy, because we have to scan all tasks
1254  * on the VM in share_resource() or we have to trap both the start and end of
1255  * task execution.
1256  *
1257  * The current solution is to use MSG_vm_set_bound(), which allows us to
1258  * directly set the bound of the dummy CPU action.
1259  *
1260  *
1261  * 2.
1262  * Note that bound == 0 means no bound (i.e., unlimited). But, if a host has
1263  * multiple CPU cores, the CPU share of a computation task (or a VM) never
1264  * exceeds the capacity of a CPU core.
1265  */
1266 void MSG_vm_set_bound(msg_vm_t vm, double bound)
1267 {
1268         return simcall_vm_set_bound(vm, bound);
1269 }
1270
1271
1272 /** @brief Set the CPU affinity of a given VM.
1273  *  @ingroup msg_VMs
1274  *
1275  * This function changes the CPU affinity of a given VM. Usage is the same as
1276  * MSG_task_set_affinity(). See the MSG_task_set_affinity() for details.
1277  */
1278 void MSG_vm_set_affinity(msg_vm_t vm, msg_host_t pm, unsigned long mask)
1279 {
1280   msg_host_priv_t priv = msg_host_resource_priv(vm);
1281
1282   if (mask == 0)
1283     xbt_dict_remove_ext(priv->affinity_mask_db, (char *) pm, sizeof(pm));
1284   else
1285     xbt_dict_set_ext(priv->affinity_mask_db, (char *) pm, sizeof(pm), (void *) mask, NULL);
1286
1287   msg_host_t pm_now = MSG_vm_get_pm(vm);
1288   if (pm_now == pm) {
1289     XBT_INFO("set affinity(0x%04lx@%s) for %s", mask, MSG_host_get_name(pm), MSG_host_get_name(vm));
1290     simcall_vm_set_affinity(vm, pm, mask);
1291   } else
1292     XBT_INFO("set affinity(0x%04lx@%s) for %s (not active now)", mask, MSG_host_get_name(pm), MSG_host_get_name(vm));
1293 }