src/surf/cpu_cas01.c

   1 /* Copyright (c) 2009-2011. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "surf_private.h"
   8 #include "surf/surf_resource.h"
   9 #include "maxmin_private.h"
  10 #include "simgrid/sg_config.h"
  11 #include "surf/cpu_cas01_private.h"
  12
  13 /* the model objects for physical machines and virtual machines */
  14 surf_model_t surf_cpu_model_pm = NULL;
  15 surf_model_t surf_cpu_model_vm = NULL;
  16
  17 #undef GENERIC_LMM_ACTION
  18 #undef GENERIC_ACTION
  19 #undef ACTION_GET_CPU
  20 #define GENERIC_LMM_ACTION(action) action->generic_lmm_action
  21 #define GENERIC_ACTION(action) GENERIC_LMM_ACTION(action).generic_action
  22 #define ACTION_GET_CPU(action) ((surf_action_cpu_Cas01_t) action)->cpu
  23
  24 typedef struct surf_action_cpu_cas01 {
  25   s_surf_action_lmm_t generic_lmm_action;
  26 } s_surf_action_cpu_Cas01_t, *surf_action_cpu_Cas01_t;
  27
  28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_cpu, surf,
  29                                 "Logging specific to the SURF CPU IMPROVED module");
  30
  31 static xbt_swag_t
  32     cpu_running_action_set_that_does_not_need_being_checked = NULL;
  33
  34
  35 void *cpu_cas01_create_resource(const char *name, double power_peak,
  36                                  double power_scale,
  37                                  tmgr_trace_t power_trace,
  38                                  int core,
  39                                  e_surf_resource_state_t state_initial,
  40                                  tmgr_trace_t state_trace,
  41                                  xbt_dict_t cpu_properties,
  42                                  surf_model_t cpu_model)
  43 {
  44   cpu_Cas01_t cpu = NULL;
  45
  46   xbt_assert(!surf_cpu_resource_priv(surf_cpu_resource_by_name(name)),
  47              "Host '%s' declared several times in the platform file",
  48              name);
  49   cpu = (cpu_Cas01_t) surf_resource_new(sizeof(s_cpu_Cas01_t),
  50                                         cpu_model, name,
  51                                         cpu_properties);
  52   cpu->power_peak = power_peak;
  53   xbt_assert(cpu->power_peak > 0, "Power has to be >0");
  54   cpu->power_scale = power_scale;
  55   cpu->core = core;
  56   xbt_assert(core > 0, "Invalid number of cores %d", core);
  57
  58   if (power_trace)
  59     cpu->power_event =
  60         tmgr_history_add_trace(history, power_trace, 0.0, 0, cpu);
  61
  62   cpu->state_current = state_initial;
  63   if (state_trace)
  64     cpu->state_event =
  65         tmgr_history_add_trace(history, state_trace, 0.0, 0, cpu);
  66
  67   cpu->constraint =
  68       lmm_constraint_new(cpu_model->model_private->maxmin_system, cpu,
  69                          cpu->core * cpu->power_scale * cpu->power_peak);
  70
  71   /* Note (hypervisor): we create a constraint object for each CPU core, which
  72    * is used for making a contraint problem of CPU affinity.
  73    **/
  74   {
  75     /* At now, we assume that a VM does not have a multicore CPU. */
  76     if (core > 1)
  77       xbt_assert(cpu_model == surf_cpu_model_pm);
  78
  79     cpu->constraint_core = xbt_new(lmm_constraint_t, core);
  80
  81     unsigned long i;
  82     for (i = 0; i < core; i++) {
  83       /* just for a unique id, never used as a string. */
  84       void *cnst_id = bprintf("%s:%lu", name, i);
  85       cpu->constraint_core[i] =
  86         lmm_constraint_new(cpu_model->model_private->maxmin_system, cnst_id,
  87             cpu->power_scale * cpu->power_peak);
  88     }
  89   }
  90
  91   xbt_lib_set(host_lib, name, SURF_CPU_LEVEL, cpu);
  92
  93   return xbt_lib_get_elm_or_null(host_lib, name);;
  94 }
  95
  96
  97 static void parse_cpu_init(sg_platf_host_cbarg_t host)
  98 {
  99   /* This function is called when a platform file is parsed. Physical machines
 100    * are defined there. Thus, we use the cpu model object for the physical
 101    * machine layer. */
 102   cpu_cas01_create_resource(host->id,
 103                       host->power_peak,
 104                       host->power_scale,
 105                       host->power_trace,
 106                       host->core_amount,
 107                       host->initial_state,
 108                       host->state_trace, host->properties,
 109                       surf_cpu_model_pm);
 110 }
 111
 112 static void cpu_add_traces_cpu(void)
 113 {
 114   xbt_dict_cursor_t cursor = NULL;
 115   char *trace_name, *elm;
 116   static int called = 0;
 117   if (called)
 118     return;
 119   called = 1;
 120
 121   /* connect all traces relative to hosts */
 122   xbt_dict_foreach(trace_connect_list_host_avail, cursor, trace_name, elm) {
 123     tmgr_trace_t trace = xbt_dict_get_or_null(traces_set_list, trace_name);
 124     cpu_Cas01_t host = surf_cpu_resource_by_name(elm);
 125
 126     xbt_assert(host, "Host %s undefined", elm);
 127     xbt_assert(trace, "Trace %s undefined", trace_name);
 128
 129     host->state_event =
 130         tmgr_history_add_trace(history, trace, 0.0, 0, host);
 131   }
 132
 133   xbt_dict_foreach(trace_connect_list_power, cursor, trace_name, elm) {
 134     tmgr_trace_t trace = xbt_dict_get_or_null(traces_set_list, trace_name);
 135     cpu_Cas01_t host = surf_cpu_resource_by_name(elm);
 136
 137     xbt_assert(host, "Host %s undefined", elm);
 138     xbt_assert(trace, "Trace %s undefined", trace_name);
 139
 140     host->power_event =
 141         tmgr_history_add_trace(history, trace, 0.0, 0, host);
 142   }
 143 }
 144
 145 static void cpu_define_callbacks_cas01()
 146 {
 147   sg_platf_host_add_cb(parse_cpu_init);
 148   sg_platf_postparse_add_cb(cpu_add_traces_cpu);
 149 }
 150
 151 static int cpu_resource_used(void *resource)
 152 {
 153   surf_model_t cpu_model = ((surf_resource_t) resource)->model;
 154
 155   /* Note (hypervisor): we do not need to look up constraint_core[i] here. Even
 156    * when a task is pinned or not, its variable object is always linked to the
 157    * basic contraint object.
 158    **/
 159
 160   return lmm_constraint_used(cpu_model->model_private->maxmin_system,
 161                              ((cpu_Cas01_t) resource)->constraint);
 162 }
 163
 164 static double cpu_share_resources_lazy(surf_model_t cpu_model, double now)
 165 {
 166   return generic_share_resources_lazy(now, cpu_model);
 167 }
 168
 169 static double cpu_share_resources_full(surf_model_t cpu_model, double now)
 170 {
 171   s_surf_action_cpu_Cas01_t action;
 172   return generic_maxmin_share_resources(cpu_model->states.
 173                                         running_action_set,
 174                                         xbt_swag_offset(action,
 175                                                         generic_lmm_action.
 176                                                         variable),
 177                                         cpu_model->model_private->maxmin_system, lmm_solve);
 178 }
 179
 180 static void cpu_update_actions_state_lazy(surf_model_t cpu_model, double now, double delta)
 181 {
 182   generic_update_actions_state_lazy(now, delta, cpu_model);
 183 }
 184
 185 static void cpu_update_actions_state_full(surf_model_t cpu_model, double now, double delta)
 186 {
 187   generic_update_actions_state_full(now, delta, cpu_model);
 188 }
 189
 190 static void cpu_update_resource_state(void *id,
 191                                       tmgr_trace_event_t event_type,
 192                                       double value, double date)
 193 {
 194   cpu_Cas01_t cpu = id;
 195   lmm_variable_t var = NULL;
 196   lmm_element_t elem = NULL;
 197   surf_model_t cpu_model = ((surf_resource_t) cpu)->model;
 198
 199   surf_watched_hosts();
 200
 201   if (event_type == cpu->power_event) {
 202     /* TODO (Hypervisor): do the same thing for constraint_core[i] */
 203     XBT_CRITICAL("FIXME: add power scaling code also for constraint_core[i]");
 204     xbt_abort();
 205
 206     cpu->power_scale = value;
 207     lmm_update_constraint_bound(cpu_model->model_private->maxmin_system, cpu->constraint,
 208                                 cpu->core * cpu->power_scale *
 209                                 cpu->power_peak);
 210 #ifdef HAVE_TRACING
 211     TRACE_surf_host_set_power(date, cpu->generic_resource.name,
 212                               cpu->core * cpu->power_scale *
 213                               cpu->power_peak);
 214 #endif
 215     while ((var = lmm_get_var_from_cnst
 216             (cpu_model->model_private->maxmin_system, cpu->constraint, &elem))) {
 217       surf_action_cpu_Cas01_t action = lmm_variable_id(var);
 218       lmm_update_variable_bound(cpu_model->model_private->maxmin_system,
 219                                 GENERIC_LMM_ACTION(action).variable,
 220                                 cpu->power_scale * cpu->power_peak);
 221     }
 222     if (tmgr_trace_event_free(event_type))
 223       cpu->power_event = NULL;
 224   } else if (event_type == cpu->state_event) {
 225     /* TODO (Hypervisor): do the same thing for constraint_core[i] */
 226     XBT_CRITICAL("FIXME: add state change code also for constraint_core[i]");
 227     xbt_abort();
 228
 229     if (value > 0)
 230       cpu->state_current = SURF_RESOURCE_ON;
 231     else {
 232       lmm_constraint_t cnst = cpu->constraint;
 233
 234       cpu->state_current = SURF_RESOURCE_OFF;
 235
 236       while ((var = lmm_get_var_from_cnst(cpu_model->model_private->maxmin_system, cnst, &elem))) {
 237         surf_action_t action = lmm_variable_id(var);
 238
 239         if (surf_action_state_get(action) == SURF_ACTION_RUNNING ||
 240             surf_action_state_get(action) == SURF_ACTION_READY ||
 241             surf_action_state_get(action) ==
 242             SURF_ACTION_NOT_IN_THE_SYSTEM) {
 243           action->finish = date;
 244           surf_action_state_set(action, SURF_ACTION_FAILED);
 245         }
 246       }
 247     }
 248     if (tmgr_trace_event_free(event_type))
 249       cpu->state_event = NULL;
 250   } else {
 251     XBT_CRITICAL("Unknown event ! \n");
 252     xbt_abort();
 253   }
 254
 255   return;
 256 }
 257
 258 static void cpu_action_set_affinity(surf_action_t action, void *cpu, unsigned long mask)
 259 {
 260   lmm_variable_t var_obj = ((surf_action_lmm_t) action)->variable;
 261
 262   surf_model_t cpu_model = action->model_obj;
 263   xbt_assert(cpu_model->type == SURF_MODEL_TYPE_CPU);
 264   cpu_Cas01_t CPU = surf_cpu_resource_priv(cpu);
 265
 266   XBT_IN("(%p,%lx)", action, mask);
 267
 268
 269   unsigned long i;
 270   for (i = 0; i < CPU->core; i++) {
 271     XBT_INFO("clear affinity %p to cpu-%lu@%s", action, i, CPU->generic_resource.name);
 272     lmm_shrink(cpu_model->model_private->maxmin_system, CPU->constraint_core[i], var_obj);
 273
 274     unsigned long has_affinity = (1UL << i) & mask;
 275     if (has_affinity) {
 276       XBT_INFO("set affinity %p to cpu-%lu@%s", action, i, CPU->generic_resource.name);
 277       lmm_expand(cpu_model->model_private->maxmin_system, CPU->constraint_core[i], var_obj, 1.0);
 278     }
 279   }
 280
 281   if (cpu_model->model_private->update_mechanism == UM_LAZY) {
 282     XBT_WARN("FIXME (hypervisor): Do we need to do something for the LAZY mode?");
 283   }
 284
 285   XBT_OUT();
 286 }
 287
 288 static surf_action_t cpu_execute(void *cpu, double size)
 289 {
 290   surf_action_cpu_Cas01_t action = NULL;
 291   cpu_Cas01_t CPU = surf_cpu_resource_priv(cpu);
 292   surf_model_t cpu_model = ((surf_resource_t) CPU)->model;
 293
 294   XBT_IN("(%s,%g)", surf_resource_name(CPU), size);
 295   action =
 296       surf_action_new(sizeof(s_surf_action_cpu_Cas01_t), size,
 297                       cpu_model,
 298                       CPU->state_current != SURF_RESOURCE_ON);
 299
 300   GENERIC_LMM_ACTION(action).suspended = 0;     /* Should be useless because of the
 301                                                    calloc but it seems to help valgrind... */
 302
 303   /* Note (hypervisor): here, the bound value of the variable is set to the
 304    * capacity of a CPU core. But, after MSG_{task/vm}_set_bound() were added to
 305    * the hypervisor branch, this bound value is overwritten in
 306    * SIMIX_host_execute().
 307    * TODO: cleanup this.
 308    */
 309   GENERIC_LMM_ACTION(action).variable =
 310       lmm_variable_new(cpu_model->model_private->maxmin_system, action,
 311                        GENERIC_ACTION(action).priority,
 312                        CPU->power_scale * CPU->power_peak, 1 + CPU->core); // the basic constraint plus core-specific constraints
 313   if (cpu_model->model_private->update_mechanism == UM_LAZY) {
 314     GENERIC_LMM_ACTION(action).index_heap = -1;
 315     GENERIC_LMM_ACTION(action).last_update = surf_get_clock();
 316     GENERIC_LMM_ACTION(action).last_value = 0.0;
 317   }
 318   lmm_expand(cpu_model->model_private->maxmin_system, CPU->constraint,
 319              GENERIC_LMM_ACTION(action).variable, 1.0);
 320   XBT_OUT();
 321   return (surf_action_t) action;
 322 }
 323
 324 static surf_action_t cpu_action_sleep(void *cpu, double duration)
 325 {
 326   surf_action_cpu_Cas01_t action = NULL;
 327   cpu_Cas01_t CPU = surf_cpu_resource_priv(cpu);
 328   surf_model_t cpu_model = ((surf_resource_t) CPU)->model;
 329
 330   if (duration > 0)
 331     duration = MAX(duration, MAXMIN_PRECISION);
 332
 333   XBT_IN("(%s,%g)", surf_resource_name(surf_cpu_resource_priv(cpu)), duration);
 334   action = (surf_action_cpu_Cas01_t) cpu_execute(cpu, 1.0);
 335   // FIXME: sleep variables should not consume 1.0 in lmm_expand
 336   GENERIC_ACTION(action).max_duration = duration;
 337   GENERIC_LMM_ACTION(action).suspended = 2;
 338   if (duration == NO_MAX_DURATION) {
 339     /* Move to the *end* of the corresponding action set. This convention
 340        is used to speed up update_resource_state  */
 341     xbt_swag_remove(action, ((surf_action_t) action)->state_set);
 342     ((surf_action_t) action)->state_set =
 343         cpu_running_action_set_that_does_not_need_being_checked;
 344     xbt_swag_insert(action, ((surf_action_t) action)->state_set);
 345   }
 346
 347   lmm_update_variable_weight(cpu_model->model_private->maxmin_system,
 348                              GENERIC_LMM_ACTION(action).variable, 0.0);
 349   if (cpu_model->model_private->update_mechanism == UM_LAZY) {     // remove action from the heap
 350     surf_action_lmm_heap_remove(cpu_model->model_private->action_heap,(surf_action_lmm_t)action);
 351     // this is necessary for a variable with weight 0 since such
 352     // variables are ignored in lmm and we need to set its max_duration
 353     // correctly at the next call to share_resources
 354     xbt_swag_insert_at_head(action, cpu_model->model_private->modified_set);
 355   }
 356
 357   XBT_OUT();
 358   return (surf_action_t) action;
 359 }
 360
 361 static e_surf_resource_state_t cpu_get_state(void *cpu)
 362 {
 363   return ((cpu_Cas01_t)surf_cpu_resource_priv(cpu))->state_current;
 364 }
 365
 366 static void cpu_set_state(void *cpu, e_surf_resource_state_t state)
 367 {
 368   ((cpu_Cas01_t)surf_cpu_resource_priv(cpu))->state_current = state;
 369 }
 370
 371 static double cpu_get_speed(void *cpu, double load)
 372 {
 373   return load * ((cpu_Cas01_t)surf_cpu_resource_priv(cpu))->power_peak;
 374 }
 375
 376 static int cpu_get_core(void *cpu)
 377 {
 378   return ((cpu_Cas01_t)surf_cpu_resource_priv(cpu))->core;
 379 }
 380 static double cpu_get_available_speed(void *cpu)
 381 {
 382   /* number between 0 and 1 */
 383   return ((cpu_Cas01_t)surf_cpu_resource_priv(cpu))->power_scale;
 384 }
 385
 386 static void cpu_finalize(surf_model_t cpu_model)
 387 {
 388   lmm_system_free(cpu_model->model_private->maxmin_system);
 389   cpu_model->model_private->maxmin_system = NULL;
 390
 391   if (cpu_model->model_private->action_heap)
 392     xbt_heap_free(cpu_model->model_private->action_heap);
 393   xbt_swag_free(cpu_model->model_private->modified_set);
 394
 395   surf_model_exit(cpu_model);
 396   cpu_model = NULL;
 397
 398   xbt_swag_free(cpu_running_action_set_that_does_not_need_being_checked);
 399   cpu_running_action_set_that_does_not_need_being_checked = NULL;
 400 }
 401
 402 static surf_model_t surf_cpu_model_init_cas01(void)
 403 {
 404   s_surf_action_t action;
 405   s_surf_action_cpu_Cas01_t comp;
 406
 407   char *optim = xbt_cfg_get_string(_sg_cfg_set, "cpu/optim");
 408   int select =
 409       xbt_cfg_get_boolean(_sg_cfg_set, "cpu/maxmin_selective_update");
 410
 411   surf_model_t cpu_model = surf_model_init();
 412
 413   if (!strcmp(optim, "Full")) {
 414     cpu_model->model_private->update_mechanism = UM_FULL;
 415     cpu_model->model_private->selective_update = select;
 416   } else if (!strcmp(optim, "Lazy")) {
 417     cpu_model->model_private->update_mechanism = UM_LAZY;
 418     cpu_model->model_private->selective_update = 1;
 419     xbt_assert((select == 1)
 420                ||
 421                (xbt_cfg_is_default_value
 422                 (_sg_cfg_set, "cpu/maxmin_selective_update")),
 423                "Disabling selective update while using the lazy update mechanism is dumb!");
 424   } else {
 425     xbt_die("Unsupported optimization (%s) for this model", optim);
 426   }
 427
 428   cpu_running_action_set_that_does_not_need_being_checked =
 429       xbt_swag_new(xbt_swag_offset(action, state_hookup));
 430
 431   cpu_model->name = "cpu";
 432   cpu_model->type = SURF_MODEL_TYPE_CPU;
 433
 434   cpu_model->action_unref = surf_action_unref;
 435   cpu_model->action_cancel = surf_action_cancel;
 436   cpu_model->action_state_set = surf_action_state_set;
 437
 438   cpu_model->model_private->resource_used = cpu_resource_used;
 439
 440   if (cpu_model->model_private->update_mechanism == UM_LAZY) {
 441     cpu_model->model_private->share_resources =
 442         cpu_share_resources_lazy;
 443     cpu_model->model_private->update_actions_state =
 444         cpu_update_actions_state_lazy;
 445   } else if (cpu_model->model_private->update_mechanism == UM_FULL) {
 446     cpu_model->model_private->share_resources =
 447         cpu_share_resources_full;
 448     cpu_model->model_private->update_actions_state =
 449         cpu_update_actions_state_full;
 450   } else
 451     xbt_die("Invalid cpu update mechanism!");
 452
 453   cpu_model->model_private->update_resource_state =
 454       cpu_update_resource_state;
 455   cpu_model->model_private->finalize = cpu_finalize;
 456
 457   cpu_model->suspend = surf_action_suspend;
 458   cpu_model->resume = surf_action_resume;
 459   cpu_model->is_suspended = surf_action_is_suspended;
 460   cpu_model->set_max_duration = surf_action_set_max_duration;
 461   cpu_model->set_priority = surf_action_set_priority;
 462   cpu_model->set_bound = surf_action_set_bound;
 463   cpu_model->set_affinity = cpu_action_set_affinity;
 464 #ifdef HAVE_TRACING
 465   cpu_model->set_category = surf_action_set_category;
 466 #endif
 467   cpu_model->get_remains = surf_action_get_remains;
 468
 469   cpu_model->extension.cpu.execute = cpu_execute;
 470   cpu_model->extension.cpu.sleep = cpu_action_sleep;
 471
 472   cpu_model->extension.cpu.get_state = cpu_get_state;
 473   cpu_model->extension.cpu.set_state = cpu_set_state;
 474   cpu_model->extension.cpu.get_core = cpu_get_core;
 475   cpu_model->extension.cpu.get_speed = cpu_get_speed;
 476   cpu_model->extension.cpu.get_available_speed =
 477       cpu_get_available_speed;
 478   cpu_model->extension.cpu.add_traces = cpu_add_traces_cpu;
 479
 480   if (!cpu_model->model_private->maxmin_system) {
 481     cpu_model->model_private->maxmin_system = lmm_system_new(cpu_model->model_private->selective_update);
 482   }
 483   if (cpu_model->model_private->update_mechanism == UM_LAZY) {
 484     cpu_model->model_private->action_heap = xbt_heap_new(8, NULL);
 485     xbt_heap_set_update_callback(cpu_model->model_private->action_heap,
 486         surf_action_lmm_update_index_heap);
 487     cpu_model->model_private->modified_set =
 488         xbt_swag_new(xbt_swag_offset(comp, generic_lmm_action.action_list_hookup));
 489     cpu_model->model_private->maxmin_system->keep_track = cpu_model->model_private->modified_set;
 490   }
 491
 492   return cpu_model;
 493 }
 494
 495 /*********************************************************************/
 496 /* Basic sharing model for CPU: that is where all this started... ;) */
 497 /*********************************************************************/
 498 /* @InProceedings{casanova01simgrid, */
 499 /*   author =       "H. Casanova", */
 500 /*   booktitle =    "Proceedings of the IEEE Symposium on Cluster Computing */
 501 /*                  and the Grid (CCGrid'01)", */
 502 /*   publisher =    "IEEE Computer Society", */
 503 /*   title =        "Simgrid: {A} Toolkit for the Simulation of Application */
 504 /*                  Scheduling", */
 505 /*   year =         "2001", */
 506 /*   month =        may, */
 507 /*   note =         "Available at */
 508 /*                  \url{http://grail.sdsc.edu/papers/simgrid_ccgrid01.ps.gz}." */
 509 /* } */
 510
 511
 512 void surf_cpu_model_init_Cas01(void)
 513 {
 514   char *optim = xbt_cfg_get_string(_sg_cfg_set, "cpu/optim");
 515
 516   xbt_assert(!surf_cpu_model_pm);
 517   xbt_assert(!surf_cpu_model_vm);
 518
 519   if (strcmp(optim, "TI") == 0) {
 520     /* FIXME: do we have to supprot TI? for VM */
 521     surf_cpu_model_pm = surf_cpu_model_init_ti();
 522     XBT_INFO("TI model is used (it will crashed since this is the hypervisor branch)");
 523   } else {
 524     surf_cpu_model_pm  = surf_cpu_model_init_cas01();
 525     surf_cpu_model_vm  = surf_cpu_model_init_cas01();
 526
 527     /* cpu_model is registered only to model_list, and not to
 528      * model_list_invoke. The shared_resource callback function will be called
 529      * from that of the workstation model. */
 530     xbt_dynar_push(model_list, &surf_cpu_model_pm);
 531     xbt_dynar_push(model_list, &surf_cpu_model_vm);
 532
 533     cpu_define_callbacks_cas01();
 534   }
 535 }
 536
 537 /* TODO: do we address nested virtualization later? */
 538 #if 0
 539 surf_model_t cpu_model_cas01(int level){
 540         // TODO this table should be allocated
 541         if(!surf_cpu_model[level])
 542          // allocate it
 543         return surf_cpu_model[level];
 544 }
 545 #endif