1 /* Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "surf_private.h"
8 #include "xbt/module.h"
10 #include "simix/smx_host_private.h"
11 #include "surf/surf_resource.h"
12 #include "xbt/xbt_os_thread.h"
13 #include "simgrid/sg_config.h"
17 XBT_LOG_NEW_CATEGORY(surf, "All SURF categories");
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_kernel, surf,
19 "Logging specific to SURF (kernel)");
21 /* Additional declarations for Windows portability. */
29 static const char *disk_drives_letter_table[MAX_DRIVE] = {
57 #endif /* #ifdef _XBT_WIN32 */
60 * Returns the initial path. On Windows the initial path is
61 * the current directory for the current process in the other
62 * case the function returns "./" that represents the current
63 * directory on Unix/Linux platforms.
66 const char *__surf_get_initial_path(void)
71 char current_directory[MAX_PATH + 1] = { 0 };
72 unsigned int len = GetCurrentDirectory(MAX_PATH + 1, current_directory);
78 strncpy(root, current_directory, 3);
80 for (i = 0; i < MAX_DRIVE; i++) {
81 if (toupper(root[0]) == disk_drives_letter_table[i][0])
82 return disk_drives_letter_table[i];
91 /* The __surf_is_absolute_file_path() returns 1 if
92 * file_path is a absolute file path, in the other
93 * case the function returns 0.
95 int __surf_is_absolute_file_path(const char *file_path)
98 WIN32_FIND_DATA wfd = { 0 };
99 HANDLE hFile = FindFirstFile(file_path, &wfd);
101 if (INVALID_HANDLE_VALUE == hFile)
107 return (file_path[0] == '/');
113 /* model_list_invoke contains only surf_workstation and surf_vm_workstation.
114 * The callback functions of cpu_model and network_model will be called from
115 * those of these workstation models. */
116 xbt_dynar_t model_list = NULL; /* for destroying all models correctly */
117 xbt_dynar_t model_list_invoke = NULL; /* for invoking callbacks */
118 tmgr_history_t history = NULL;
119 lmm_system_t maxmin_system = NULL;
120 xbt_dynar_t surf_path = NULL;
122 /* Don't forget to update the option description in smx_config when you change this */
123 s_surf_model_description_t surf_network_model_description[] = {
125 "Realistic network analytic model (slow-start modeled by multiplying latency by 10.4, bandwidth by .92; bottleneck sharing uses a payload of S=8775 for evaluating RTT). ",
126 surf_network_model_init_LegrandVelho},
128 "Simplistic network model where all communication take a constant time (one second). This model provides the lowest realism, but is (marginally) faster.",
129 surf_network_model_init_Constant},
131 "Realistic network model specifically tailored for HPC settings (accurate modeling of slow start with correction factors on three intervals: < 1KiB, < 64 KiB, >= 64 KiB)",
132 surf_network_model_init_SMPI},
134 "Legacy network analytic model (Very similar to LV08, but without corrective factors. The timings of small messages are thus poorly modeled).",
135 surf_network_model_init_CM02},
138 "Network pseudo-model using the GTNets simulator instead of an analytic model",
139 surf_network_model_init_GTNETS},
143 "Network pseudo-model using the NS3 tcp model instead of an analytic model",
144 surf_network_model_init_NS3},
147 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
148 surf_network_model_init_Reno},
150 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
151 surf_network_model_init_Reno2},
153 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
154 surf_network_model_init_Vegas},
155 {NULL, NULL, NULL} /* this array must be NULL terminated */
158 s_surf_model_description_t surf_cpu_model_description[] = {
160 "Simplistic CPU model (time=size/power).",
161 surf_cpu_model_init_Cas01},
162 {NULL, NULL, NULL} /* this array must be NULL terminated */
165 s_surf_model_description_t surf_workstation_model_description[] = {
167 "Default workstation model. Currently, CPU:Cas01 and network:LV08 (with cross traffic enabled)",
168 surf_workstation_model_init_current_default},
170 "Workstation model that is automatically chosen if you change the network and CPU models",
171 surf_workstation_model_init_compound},
172 {"ptask_L07", "Workstation model somehow similar to Cas01+CM02 but allowing parallel tasks",
173 surf_workstation_model_init_ptask_L07},
174 {NULL, NULL, NULL} /* this array must be NULL terminated */
177 s_surf_model_description_t surf_optimization_mode_description[] = {
179 "Lazy action management (partial invalidation in lmm + heap in action remaining).",
182 "Trace integration. Highly optimized mode when using availability traces (only available for the Cas01 CPU model for now).",
185 "Full update of remaining and variables. Slow but may be useful when debugging.",
187 {NULL, NULL, NULL} /* this array must be NULL terminated */
190 s_surf_model_description_t surf_storage_model_description[] = {
192 "Simplistic storage model.",
193 surf_storage_model_init_default},
194 {NULL, NULL, NULL} /* this array must be NULL terminated */
197 /* ********************************************************************* */
198 /* TUTORIAL: New model */
199 s_surf_model_description_t surf_new_model_description[] = {
202 surf_new_model_init_default},
203 {NULL, NULL, NULL} /* this array must be NULL terminated */
205 /* ********************************************************************* */
207 #ifdef CONTEXT_THREADS
208 static xbt_parmap_t surf_parmap = NULL; /* parallel map on models */
211 static int surf_nthreads = 1; /* number of threads of the parmap (1 means no parallelism) */
212 static double *surf_mins = NULL; /* return value of share_resources for each model */
213 static int surf_min_index; /* current index in surf_mins */
214 static double min; /* duration determined by surf_solve */
216 static void surf_share_resources(surf_model_t model);
217 static void surf_update_actions_state(surf_model_t model);
219 /** Displays the long description of all registered models, and quit */
220 void model_help(const char *category, s_surf_model_description_t * table)
223 printf("Long description of the %s models accepted by this simulator:\n",
225 for (i = 0; table[i].name; i++)
226 printf(" %s: %s\n", table[i].name, table[i].description);
229 int find_model_description(s_surf_model_description_t * table,
233 char *name_list = NULL;
235 for (i = 0; table[i].name; i++)
236 if (!strcmp(name, table[i].name)) {
239 name_list = strdup(table[0].name);
240 for (i = 1; table[i].name; i++) {
242 xbt_realloc(name_list,
243 strlen(name_list) + strlen(table[i].name) + 3);
244 strcat(name_list, ", ");
245 strcat(name_list, table[i].name);
247 xbt_die("Model '%s' is invalid! Valid models are: %s.", name, name_list);
251 double generic_maxmin_share_resources(xbt_swag_t running_actions,
254 void (*solve) (lmm_system_t))
256 surf_action_t action = NULL;
259 #define VARIABLE(action) (*((lmm_variable_t*)(((char *) (action)) + (offset))))
263 xbt_swag_foreach(action, running_actions) {
264 value = lmm_variable_getvalue(VARIABLE(action));
265 if ((value > 0) || (action->max_duration >= 0))
273 if (action->remains > 0)
274 min = action->remains / value;
277 if ((action->max_duration >= 0) && (action->max_duration < min))
278 min = action->max_duration;
280 min = action->max_duration;
283 for (action = xbt_swag_getNext(action, running_actions->offset);
285 action = xbt_swag_getNext(action, running_actions->offset)) {
286 value = lmm_variable_getvalue(VARIABLE(action));
288 if (action->remains > 0)
289 value = action->remains / value;
294 XBT_DEBUG("Updating min (value) with %p: %f", action, min);
297 if ((action->max_duration >= 0) && (action->max_duration < min)) {
298 min = action->max_duration;
299 XBT_DEBUG("Updating min (duration) with %p: %f", action, min);
302 XBT_DEBUG("min value : %f", min);
308 double generic_share_resources_lazy(double now, surf_model_t model)
310 surf_action_lmm_t action = NULL;
315 ("Before share resources, the size of modified actions set is %d",
316 xbt_swag_size(model->model_private->modified_set));
318 lmm_solve(model->model_private->maxmin_system);
321 ("After share resources, The size of modified actions set is %d",
322 xbt_swag_size(model->model_private->modified_set));
324 while((action = xbt_swag_extract(model->model_private->modified_set))) {
325 int max_dur_flag = 0;
327 if (action->generic_action.state_set !=
328 model->states.running_action_set)
331 /* bogus priority, skip it */
332 if (action->generic_action.priority <= 0)
335 generic_update_action_remaining_lazy(action,now);
338 value = lmm_variable_getvalue(action->variable);
340 if (action->generic_action.remains > 0) {
341 value = action->generic_action.remains / value;
349 if ((action->generic_action.max_duration != NO_MAX_DURATION)
351 || action->generic_action.start +
352 action->generic_action.max_duration < min)) {
353 min = action->generic_action.start +
354 action->generic_action.max_duration;
358 XBT_DEBUG("Action(%p) Start %lf Finish %lf Max_duration %lf", action,
359 action->generic_action.start, now + value,
360 action->generic_action.max_duration);
363 surf_action_lmm_heap_remove(model->model_private->action_heap,action);
364 surf_action_lmm_heap_insert(model->model_private->action_heap,action, min, max_dur_flag ? MAX_DURATION : NORMAL);
365 XBT_DEBUG("Insert at heap action(%p) min %lf now %lf", action, min,
367 } else DIE_IMPOSSIBLE;
370 //hereafter must have already the min value for this resource model
371 if (xbt_heap_size(model->model_private->action_heap) > 0)
372 min = xbt_heap_maxkey(model->model_private->action_heap) - now;
376 XBT_DEBUG("The minimum with the HEAP %lf", min);
380 static XBT_INLINE void routing_asr_host_free(void *p)
382 sg_routing_edge_t elm = p;
387 static XBT_INLINE void routing_asr_prop_free(void *p)
393 void sg_version(int *ver_major,int *ver_minor,int *ver_patch) {
394 *ver_major = SIMGRID_VERSION_MAJOR;
395 *ver_minor = SIMGRID_VERSION_MINOR;
396 *ver_patch = SIMGRID_VERSION_PATCH;
399 void surf_init(int *argc, char **argv)
401 XBT_DEBUG("Create all Libs");
402 host_lib = xbt_lib_new();
403 link_lib = xbt_lib_new();
404 as_router_lib = xbt_lib_new();
405 storage_lib = xbt_lib_new();
406 storage_type_lib = xbt_lib_new();
407 watched_hosts_lib = xbt_dict_new();
409 XBT_DEBUG("Add routing levels");
410 ROUTING_HOST_LEVEL = xbt_lib_add_level(host_lib,routing_asr_host_free);
411 ROUTING_ASR_LEVEL = xbt_lib_add_level(as_router_lib,routing_asr_host_free);
412 ROUTING_PROP_ASR_LEVEL = xbt_lib_add_level(as_router_lib,routing_asr_prop_free);
414 XBT_DEBUG("Add SURF levels");
415 SURF_CPU_LEVEL = xbt_lib_add_level(host_lib,surf_resource_free);
416 SURF_WKS_LEVEL = xbt_lib_add_level(host_lib,surf_resource_free);
417 SURF_LINK_LEVEL = xbt_lib_add_level(link_lib,surf_resource_free);
419 xbt_init(argc, argv);
421 model_list = xbt_dynar_new(sizeof(surf_model_private_t), NULL);
422 if (!model_list_invoke)
423 model_list_invoke = xbt_dynar_new(sizeof(surf_model_private_t), NULL);
425 history = tmgr_history_new();
428 TRACE_add_start_function(TRACE_surf_alloc);
429 TRACE_add_end_function(TRACE_surf_release);
432 sg_config_init(argc, argv);
440 # define FILE_DELIM "\\"
442 # define FILE_DELIM "/" /* FIXME: move to better location */
445 FILE *surf_fopen(const char *name, const char *mode)
448 char *path_elm = NULL;
454 if (__surf_is_absolute_file_path(name)) /* don't mess with absolute file names */
455 return fopen(name, mode);
457 /* search relative files in the path */
458 xbt_dynar_foreach(surf_path, cpt, path_elm) {
459 buff = bprintf("%s" FILE_DELIM "%s", path_elm, name);
460 file = fopen(buff, mode);
472 surf_model_t model = NULL;
474 sg_config_finalize();
476 xbt_dynar_foreach(model_list, iter, model)
477 model->model_private->finalize(model);
478 xbt_dynar_free(&model_list);
480 xbt_dynar_free(&model_list_invoke);
485 lmm_system_free(maxmin_system);
486 maxmin_system = NULL;
489 tmgr_history_free(history);
494 #ifdef CONTEXT_THREADS
495 xbt_parmap_destroy(surf_parmap);
500 xbt_dynar_free(&surf_path);
502 xbt_lib_free(&host_lib);
503 xbt_lib_free(&link_lib);
504 xbt_lib_free(&as_router_lib);
505 xbt_lib_free(&storage_lib);
506 xbt_lib_free(&storage_type_lib);
508 xbt_dict_free(&watched_hosts_lib);
511 surf_parse_lex_destroy();
512 surf_parse_free_callbacks();
514 NOW = 0; /* Just in case the user plans to restart the simulation afterward */
517 void surf_presolve(void)
519 double next_event_date = -1.0;
520 tmgr_trace_event_t event = NULL;
522 surf_resource_t resource = NULL;
523 surf_model_t model = NULL;
527 ("First Run! Let's \"purge\" events and put models in the right state");
528 while ((next_event_date = tmgr_history_next_date(history)) != -1.0) {
529 if (next_event_date > NOW)
532 tmgr_history_get_next_event_leq(history, next_event_date,
534 (void **) &resource))) {
536 resource->model->model_private->update_resource_state(resource,
543 /* FIXME: see what is check_update_action_state(). if necessary, use model_list_invoke. */
544 xbt_dynar_foreach(model_list, iter, model)
545 model->model_private->update_actions_state(model, NOW, 0.0);
548 double surf_solve(double max_date)
550 min = -1.0; /* duration */
551 double next_event_date = -1.0;
552 double model_next_action_end = -1.0;
554 surf_resource_t resource = NULL;
555 surf_model_t model = NULL;
556 tmgr_trace_event_t event = NULL;
559 if (max_date != -1.0 && max_date != NOW) {
560 min = max_date - NOW;
563 XBT_DEBUG("Looking for next action end for all models except NS3");
565 if (surf_mins == NULL) {
566 surf_mins = xbt_new(double, xbt_dynar_length(model_list_invoke));
571 if (surf_get_nthreads() > 1) {
572 /* parallel version */
573 #ifdef CONTEXT_THREADS
574 xbt_parmap_apply(surf_parmap, (void_f_pvoid_t) surf_share_resources, model_list_invoke);
576 xbt_die("Asked to run in parallel, but no thread at hand...");
580 /* sequential version */
581 xbt_dynar_foreach(model_list_invoke, iter, model) {
582 surf_share_resources(model);
587 for (i = 0; i < xbt_dynar_length(model_list_invoke); i++) {
588 if ((min < 0.0 || surf_mins[i] < min)
589 && surf_mins[i] >= 0.0) {
594 XBT_DEBUG("Min for resources (remember that NS3 don't update that value) : %f", min);
596 XBT_DEBUG("Looking for next trace event");
599 XBT_DEBUG("Next TRACE event : %f", next_event_date);
601 next_event_date = tmgr_history_next_date(history);
603 if(surf_network_model->name && !strcmp(surf_network_model->name,"network NS3")){
604 if(next_event_date!=-1.0 && min!=-1.0) {
605 min = MIN(next_event_date - NOW, min);
607 min = MAX(next_event_date - NOW, min);
610 XBT_DEBUG("Run for network at most %f", min);
611 // run until min or next flow
612 model_next_action_end = surf_network_model->model_private->share_resources(surf_network_model, min);
614 XBT_DEBUG("Min for network : %f", model_next_action_end);
615 if(model_next_action_end>=0.0)
616 min = model_next_action_end;
619 if (next_event_date < 0.0) {
620 XBT_DEBUG("no next TRACE event. Stop searching for it");
624 if ((min == -1.0) || (next_event_date > NOW + min)) break;
626 XBT_DEBUG("Updating models (min = %g, NOW = %g, next_event_date = %g)",min, NOW, next_event_date);
628 tmgr_history_get_next_event_leq(history, next_event_date,
630 (void **) &resource))) {
631 if (resource->model->model_private->resource_used(resource)) {
632 min = next_event_date - NOW;
634 ("This event will modify model state. Next event set to %f",
637 /* update state of model_obj according to new value. Does not touch lmm.
638 It will be modified if needed when updating actions */
639 XBT_DEBUG("Calling update_resource_state for resource %s with min %lf",
640 resource->model->name, min);
641 resource->model->model_private->update_resource_state(resource,
647 /* FIXME: Moved this test to here to avoid stopping simulation if there are actions running on cpus and all cpus are with availability = 0.
648 * This may cause an infinite loop if one cpu has a trace with periodicity = 0 and the other a trace with periodicity > 0.
649 * The options are: all traces with same periodicity(0 or >0) or we need to change the way how the events are managed */
651 XBT_DEBUG("No next event at all. Bail out now.");
655 XBT_DEBUG("Duration set to %f", min);
659 if (surf_get_nthreads() > 1) {
660 /* parallel version */
661 #ifdef CONTEXT_THREADS
662 xbt_parmap_apply(surf_parmap, (void_f_pvoid_t) surf_update_actions_state, model_list);
666 /* FIXME: model_list or model_list_invoke? revisit here later */
667 /* sequential version */
668 xbt_dynar_foreach(model_list, iter, model) {
669 surf_update_actions_state(model);
674 TRACE_paje_dump_buffer (0);
680 XBT_INLINE double surf_get_clock(void)
685 static void surf_share_resources(surf_model_t model)
687 double next_action_end = -1.0;
688 int i = __sync_fetch_and_add(&surf_min_index, 1);
689 if (strcmp(model->name,"network NS3")) {
690 XBT_DEBUG("Running for Resource [%s]", model->name);
691 next_action_end = model->model_private->share_resources(model, NOW);
692 XBT_DEBUG("Resource [%s] : next action end = %f",
693 model->name, next_action_end);
695 surf_mins[i] = next_action_end;
698 static void surf_update_actions_state(surf_model_t model)
700 model->model_private->update_actions_state(model, NOW, min);
704 * \brief Returns the number of parallel threads used to update the models.
705 * \return the number of threads (1 means no parallelism)
707 int surf_get_nthreads(void) {
708 return surf_nthreads;
712 * \brief Sets the number of parallel threads used to update the models.
714 * A value of 1 means no parallelism.
716 * \param nb_threads the number of threads to use
718 void surf_set_nthreads(int nthreads) {
721 nthreads = xbt_os_get_numcores();
722 XBT_INFO("Auto-setting surf/nthreads to %d",nthreads);
725 #ifdef CONTEXT_THREADS
726 xbt_parmap_destroy(surf_parmap);
731 #ifdef CONTEXT_THREADS
732 surf_parmap = xbt_parmap_new(nthreads, XBT_PARMAP_DEFAULT);
734 THROWF(arg_error, 0, "Cannot activate parallel threads in Surf: your architecture does not support threads");
738 surf_nthreads = nthreads;
741 /* This function is a pimple that we ought to fix. But it won't be easy.
743 * The surf_solve() function does properly return the set of actions that changed.
744 * Instead, each model change a global data, and then the caller of surf_solve must
745 * pick into these sets of action_failed and action_done.
747 * This was not clean but ok as long as we didn't had to restart the processes when the resource comes back up.
748 * We worked by putting sentinel actions on every resources we are interested in,
749 * so that surf informs us if/when the corresponding resource fails.
751 * But this does not work to get Simix informed of when a resource comes back up, and this is where this pimple comes.
752 * We have a set of resources that are currently down and for which simix needs to know when it comes back up.
753 * And the current function is called *at every simulation step* to sweep over that set, searching for a resource
754 * that was turned back up in the meanwhile. This is UGLY and slow.
756 * The proper solution would be to not rely on globals for the action_failed and action_done swags.
757 * They must be passed as parameter by the caller (the handling of these actions in simix may let you
758 * think that these two sets can be merged, but their handling in SimDag induce the contrary unless this
759 * simdag code can check by itself whether the action is done of failed -- seems very doable, but yet more
762 * Once surf_solve() is passed the set of actions that changed, you want to add a new set of resources back up
763 * as parameter to this function. You also want to add a boolean field "restart_watched" to each resource, and
764 * make sure that whenever a resource with this field enabled comes back up, it's added to that set so that Simix
765 * sees it and react accordingly. This would kill that need for surf to call simix.
769 static void remove_watched_host(void *key)
771 xbt_dict_remove(watched_hosts_lib, *(char**)key);
774 void surf_watched_hosts(void)
778 xbt_dict_cursor_t cursor;
779 xbt_dynar_t hosts = xbt_dynar_new(sizeof(char*), NULL);
781 XBT_DEBUG("Check for host SURF_RESOURCE_ON on watched_hosts_lib");
782 xbt_dict_foreach(watched_hosts_lib,cursor,key,host)
784 if(SIMIX_host_get_state(host) == SURF_RESOURCE_ON){
785 XBT_INFO("Restart processes on host: %s",SIMIX_host_get_name(host));
786 SIMIX_host_autorestart(host);
787 xbt_dynar_push_as(hosts, char*, key);
790 XBT_DEBUG("See SURF_RESOURCE_OFF on host: %s",key);
792 xbt_dynar_map(hosts, remove_watched_host);
793 xbt_dynar_free(&hosts);