X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/5295fbb473702220cab0389b28d6d4a3097c246d..155a1e0df5db6960042e06036b942b9f93378b05:/src/surf/surf_interface.cpp diff --git a/src/surf/surf_interface.cpp b/src/surf/surf_interface.cpp index 729aa65e20..2e799547f8 100644 --- a/src/surf/surf_interface.cpp +++ b/src/surf/surf_interface.cpp @@ -25,29 +25,35 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_kernel, surf, /* This function is a pimple that we ought to fix. But it won't be easy. * - * The surf_solve() function does properly return the set of actions that changed. - * Instead, each model change a global data, and then the caller of surf_solve must - * pick into these sets of action_failed and action_done. + * The surf_solve() function does properly return the set of actions that + * changed. Instead, each model change a global data, and then the caller of + * surf_solve must pick into these sets of action_failed and action_done. * - * This was not clean but ok as long as we didn't had to restart the processes when the resource comes back up. - * We worked by putting sentinel actions on every resources we are interested in, - * so that surf informs us if/when the corresponding resource fails. + * This was not clean but ok as long as we didn't had to restart the processes + * when the resource comes back up. + * We worked by putting sentinel actions on every resources we are interested + * in, so that surf informs us if/when the corresponding resource fails. * - * But this does not work to get Simix informed of when a resource comes back up, and this is where this pimple comes. - * We have a set of resources that are currently down and for which simix needs to know when it comes back up. - * And the current function is called *at every simulation step* to sweep over that set, searching for a resource - * that was turned back up in the meanwhile. This is UGLY and slow. + * But this does not work to get Simix informed of when a resource comes back + * up, and this is where this pimple comes. We have a set of resources that are + * currently down and for which simix needs to know when it comes back up. + * And the current function is called *at every simulation step* to sweep over + * that set, searching for a resource that was turned back up in the meanwhile. + * This is UGLY and slow. * - * The proper solution would be to not rely on globals for the action_failed and action_done swags. - * They must be passed as parameter by the caller (the handling of these actions in simix may let you - * think that these two sets can be merged, but their handling in SimDag induce the contrary unless this - * simdag code can check by itself whether the action is done of failed -- seems very doable, but yet more - * cleanup to do). + * The proper solution would be to not rely on globals for the action_failed and + * action_done swags. They must be passed as parameter by the caller (the + * handling of these actions in simix may let you think that these two sets can + * be merged, but their handling in SimDag induce the contrary unless this + * simdag code can check by itself whether the action is done of failed -- seems + * very doable, but yet more cleanup to do). * - * Once surf_solve() is passed the set of actions that changed, you want to add a new set of resources back up - * as parameter to this function. You also want to add a boolean field "restart_watched" to each resource, and - * make sure that whenever a resource with this field enabled comes back up, it's added to that set so that Simix - * sees it and react accordingly. This would kill that need for surf to call simix. + * Once surf_solve() is passed the set of actions that changed, you want to add + * a new set of resources back up as parameter to this function. You also want + * to add a boolean field "restart_watched" to each resource, and make sure that + * whenever a resource with this field enabled comes back up, it's added to that + * set so that Simix sees it and react accordingly. This would kill that need + * for surf to call simix. * */ @@ -93,13 +99,14 @@ xbt_dict_t watched_hosts_lib; surf_callback(void, void) surfExitCallbacks; s_surf_model_description_t surf_plugin_description[] = { - {"Energy", - "Cpu energy consumption.", - sg_energy_plugin_init}, - {NULL, NULL, NULL} /* this array must be NULL terminated */ + {"Energy", + "Cpu energy consumption.", + sg_energy_plugin_init}, + {NULL, NULL, NULL} /* this array must be NULL terminated */ }; -/* Don't forget to update the option description in smx_config when you change this */ +/* Don't forget to update the option description in smx_config when you change + this */ s_surf_model_description_t surf_network_model_description[] = { {"LV08", "Realistic network analytic model (slow-start modeled by multiplying latency by 10.4, bandwidth by .92; bottleneck sharing uses a payload of S=8775 for evaluating RTT). ", @@ -110,6 +117,9 @@ s_surf_model_description_t surf_network_model_description[] = { {"SMPI", "Realistic network model specifically tailored for HPC settings (accurate modeling of slow start with correction factors on three intervals: < 1KiB, < 64 KiB, >= 64 KiB)", surf_network_model_init_SMPI}, + {"IB", + "Realistic network model specifically tailored for HPC settings, with Infiniband contention model", + surf_network_model_init_IB}, {"CM02", "Legacy network analytic model (Very similar to LV08, but without corrective factors. The timings of small messages are thus poorly modeled).", surf_network_model_init_CM02}, @@ -157,7 +167,7 @@ s_surf_model_description_t surf_workstation_model_description[] = { s_surf_model_description_t surf_vm_workstation_model_description[] = { {"default", "Default vm workstation model.", - surf_vm_workstation_model_init_current_default}, + surf_vm_workstation_model_init_HL13}, {NULL, NULL, NULL} /* this array must be NULL terminated */ }; @@ -260,7 +270,7 @@ static const char *disk_drives_letter_table[MAX_DRIVE] = { "Y:\\", "Z:\\" }; -#endif +#endif /* * Returns the initial path. On Windows the initial path is @@ -334,7 +344,9 @@ int find_model_description(s_surf_model_description_t * table, if (!strcmp(name, table[i].name)) { return i; } - name_list = strdup(table[0].name); + if (!table[0].name) + xbt_die("No model is valid! This is a bug."); + name_list = xbt_strdup(table[0].name); for (i = 1; table[i].name; i++) { name_list = (char *) xbt_realloc(name_list, strlen(name_list) + strlen(table[i].name) + 3); strcat(name_list, ", "); @@ -370,6 +382,11 @@ static XBT_INLINE void surf_workstation_free(void *r) delete static_cast(r); } +static XBT_INLINE void surf_storage_free(void *r) +{ + delete static_cast(r); +} + void sg_version(int *ver_major,int *ver_minor,int *ver_patch) { *ver_major = SIMGRID_VERSION_MAJOR; @@ -385,6 +402,7 @@ void surf_init(int *argc, char **argv) as_router_lib = xbt_lib_new(); storage_lib = xbt_lib_new(); storage_type_lib = xbt_lib_new(); + file_lib = xbt_lib_new(); watched_hosts_lib = xbt_dict_new_homogeneous(NULL); XBT_DEBUG("Add routing levels"); @@ -396,6 +414,7 @@ void surf_init(int *argc, char **argv) SURF_CPU_LEVEL = xbt_lib_add_level(host_lib,surf_cpu_free); SURF_WKS_LEVEL = xbt_lib_add_level(host_lib,surf_workstation_free); SURF_LINK_LEVEL = xbt_lib_add_level(link_lib,surf_link_free); + SURF_STORAGE_LEVEL = xbt_lib_add_level(storage_lib,surf_storage_free); xbt_init(argc, argv); if (!model_list) @@ -436,7 +455,7 @@ void surf_exit(void) xbt_lib_free(&as_router_lib); xbt_lib_free(&storage_lib); xbt_lib_free(&storage_type_lib); - + xbt_lib_free(&file_lib); xbt_dict_free(&watched_hosts_lib); xbt_dynar_foreach(model_list, iter, model) @@ -500,18 +519,18 @@ double Model::shareResources(double now) { //FIXME: set the good function once and for all if (p_updateMechanism == UM_LAZY) - return shareResourcesLazy(now); + return shareResourcesLazy(now); else if (p_updateMechanism == UM_FULL) - return shareResourcesFull(now); + return shareResourcesFull(now); else - xbt_die("Invalid cpu update mechanism!"); + xbt_die("Invalid cpu update mechanism!"); } double Model::shareResourcesLazy(double now) { ActionPtr action = NULL; double min = -1; - double value; + double share; XBT_DEBUG ("Before share resources, the size of modified actions set is %zd", @@ -524,29 +543,30 @@ double Model::shareResourcesLazy(double now) p_modifiedSet->size()); while(!p_modifiedSet->empty()) { - action = &(p_modifiedSet->front()); - p_modifiedSet->pop_front(); + action = &(p_modifiedSet->front()); + p_modifiedSet->pop_front(); int max_dur_flag = 0; if (action->getStateSet() != p_runningActionSet) continue; /* bogus priority, skip it */ - if (action->getPriority() <= 0) + if (action->getPriority() <= 0 || action->getHat()==LATENCY) continue; action->updateRemainingLazy(now); min = -1; - value = lmm_variable_getvalue(action->getVariable()); - if (value > 0) { + share = lmm_variable_getvalue(action->getVariable()); + + if (share > 0) { + double time_to_completion; if (action->getRemains() > 0) { - value = action->getRemainsNoUpdate() / value; - min = now + value; + time_to_completion = action->getRemainsNoUpdate() / share; } else { - value = 0.0; - min = now; + time_to_completion = 0.0; } + min = now + time_to_completion; // when the task will complete if nothing changes } if ((action->getMaxDuration() != NO_MAX_DURATION) @@ -554,18 +574,20 @@ double Model::shareResourcesLazy(double now) || action->getStartTime() + action->getMaxDuration() < min)) { min = action->getStartTime() + - action->getMaxDuration(); + action->getMaxDuration(); // when the task will complete anyway because of the deadline if any max_dur_flag = 1; } - XBT_DEBUG("Action(%p) Start %lf Finish %lf Max_duration %lf", action, - action->getStartTime(), now + value, + + XBT_DEBUG("Action(%p) corresponds to variable %d", action, action->getVariable()->id_int); + + XBT_DEBUG("Action(%p) Start %f. May finish at %f (got a share of %f). Max_duration %f", action, + action->getStartTime(), min, share, action->getMaxDuration()); if (min != -1) { - action->heapRemove(p_actionHeap); - action->heapInsert(p_actionHeap, min, max_dur_flag ? MAX_DURATION : NORMAL); - XBT_DEBUG("Insert at heap action(%p) min %lf now %lf", action, min, + action->heapUpdate(p_actionHeap, min, max_dur_flag ? MAX_DURATION : NORMAL); + XBT_DEBUG("Insert at heap action(%p) min %f now %f", action, min, now); } else DIE_IMPOSSIBLE; } @@ -576,7 +598,7 @@ double Model::shareResourcesLazy(double now) else min = -1; - XBT_DEBUG("The minimum with the HEAP %lf", min); + XBT_DEBUG("The minimum with the HEAP %f", min); return min; } @@ -597,7 +619,7 @@ double Model::shareResourcesMaxMin(ActionListPtr running_actions, ActionList::iterator it(running_actions->begin()), itend(running_actions->end()); for(; it != itend ; ++it) { - action = &*it; + action = &*it; value = lmm_variable_getvalue(action->getVariable()); if ((value > 0) || (action->getMaxDuration() >= 0)) break; @@ -652,10 +674,12 @@ void Model::updateActionsState(double now, double delta) void Model::updateActionsStateLazy(double /*now*/, double /*delta*/) { + THROW_UNIMPLEMENTED; } void Model::updateActionsStateFull(double /*now*/, double /*delta*/) { + THROW_UNIMPLEMENTED; } /************ @@ -725,7 +749,7 @@ const char *Resource::getName() { xbt_dict_t Resource::getProperties() { if (p_properties==NULL) - p_properties = xbt_dict_new(); + p_properties = xbt_dict_new(); return p_properties; } @@ -746,26 +770,29 @@ const char *surf_action_state_names[6] = { "SURF_ACTION_NOT_IN_THE_SYSTEM" }; -Action::Action() -: m_refcount(1) -{} +void Action::initialize(ModelPtr model, double cost, bool failed, + lmm_variable_t var) +{ + m_priority = 1.0; + m_refcount = 1; + m_remains = cost; + m_maxDuration = NO_MAX_DURATION; + m_finish = -1.0; + m_failed = failed; + m_start = surf_get_clock(); + m_cost = cost; + p_model = model; + p_data = NULL; + p_variable = var; + m_lastValue = 0; + m_lastUpdate = 0; + m_suspended = false; + m_hat = NOTSET; +} Action::Action(ModelPtr model, double cost, bool failed) - : m_priority(1.0) - , m_refcount(1) - , m_remains(cost) - , m_maxDuration(NO_MAX_DURATION) - , m_finish(-1.0) - , m_failed(failed) - , m_start(surf_get_clock()) - , m_cost(cost) - , p_model(model) - , p_data(NULL) - , p_variable(NULL) - , m_lastValue(0) - , m_lastUpdate(0) - , m_suspended(false) { + initialize(model, cost, failed); #ifdef HAVE_TRACING p_category = NULL; #endif @@ -780,21 +807,8 @@ Action::Action(ModelPtr model, double cost, bool failed) } Action::Action(ModelPtr model, double cost, bool failed, lmm_variable_t var) - : m_priority(1.0) - , m_refcount(1) - , m_remains(cost) - , m_maxDuration(NO_MAX_DURATION) - , m_finish(-1.0) - , m_failed(failed) - , m_start(surf_get_clock()) - , m_cost(cost) - , p_model(model) - , p_data(NULL) - , p_variable(var) - , m_lastValue(0) - , m_lastUpdate(0) - , m_suspended(false) { + initialize(model, cost, failed, var); #ifdef HAVE_TRACING p_category = NULL; #endif @@ -848,7 +862,7 @@ void Action::setState(e_surf_action_state_t state) p_stateSet = NULL; if (p_stateSet) - p_stateSet->push_back(*this); + p_stateSet->push_back(*this); XBT_OUT(); } @@ -861,10 +875,10 @@ void Action::setBound(double bound) { XBT_IN("(%p,%g)", this, bound); if (p_variable) - lmm_update_variable_bound(getModel()->getMaxminSystem(), getVariable(), bound); + lmm_update_variable_bound(getModel()->getMaxminSystem(), p_variable, bound); - if (getModel()->getUpdateMechanism() == UM_LAZY) - heapRemove(getModel()->getActionHeap()); + if (getModel()->getUpdateMechanism() == UM_LAZY && getLastUpdate()!=surf_get_clock()) + heapRemove(getModel()->getActionHeap()); XBT_OUT(); } @@ -922,8 +936,8 @@ void Action::setPriority(double priority) void Action::cancel(){ setState(SURF_ACTION_FAILED); if (getModel()->getUpdateMechanism() == UM_LAZY) { - if (actionLmmHook::is_linked()) - getModel()->getModifiedSet()->erase(getModel()->getModifiedSet()->iterator_to(*this)); + if (actionLmmHook::is_linked()) + getModel()->getModifiedSet()->erase(getModel()->getModifiedSet()->iterator_to(*this)); heapRemove(getModel()->getActionHeap()); } } @@ -931,18 +945,18 @@ void Action::cancel(){ int Action::unref(){ m_refcount--; if (!m_refcount) { - if (actionHook::is_linked()) - p_stateSet->erase(p_stateSet->iterator_to(*this)); - if (getVariable()) - lmm_variable_free(getModel()->getMaxminSystem(), getVariable()); - if (getModel()->getUpdateMechanism() == UM_LAZY) { - /* remove from heap */ - heapRemove(getModel()->getActionHeap()); + if (actionHook::is_linked()) + p_stateSet->erase(p_stateSet->iterator_to(*this)); + if (getVariable()) + lmm_variable_free(getModel()->getMaxminSystem(), getVariable()); + if (getModel()->getUpdateMechanism() == UM_LAZY) { + /* remove from heap */ + heapRemove(getModel()->getActionHeap()); if (actionLmmHook::is_linked()) - getModel()->getModifiedSet()->erase(getModel()->getModifiedSet()->iterator_to(*this)); + getModel()->getModifiedSet()->erase(getModel()->getModifiedSet()->iterator_to(*this)); } - delete this; - return 1; + delete this; + return 1; } return 0; } @@ -996,6 +1010,16 @@ void Action::heapRemove(xbt_heap_t heap) } } +void Action::heapUpdate(xbt_heap_t heap, double key, enum heap_action_type hat) +{ + m_hat = hat; + if (m_indexHeap >= 0) { + xbt_heap_update(heap, m_indexHeap, key); + }else{ + xbt_heap_push(heap, this, key); + } +} + /* added to manage the communication action's heap */ void surf_action_lmm_update_index_heap(void *action, int i) { ((ActionPtr)action)->updateIndexHeap(i); @@ -1025,7 +1049,7 @@ void Action::updateRemainingLazy(double now) { double delta = 0.0; - if(getModel() == static_cast(surf_network_model)) + if(getModel() == surf_network_model) { if (m_suspended != 0) return; @@ -1044,10 +1068,10 @@ void Action::updateRemainingLazy(double now) if (m_remains > 0) { XBT_DEBUG("Updating action(%p): remains was %f, last_update was: %f", this, m_remains, m_lastUpdate); - double_update(&m_remains, m_lastValue * delta); + double_update(&m_remains, m_lastValue * delta, sg_surf_precision*sg_maxmin_precision); #ifdef HAVE_TRACING - if (getModel() == static_cast(surf_cpu_model_pm) && TRACE_is_enabled()) { + if (getModel() == surf_cpu_model_pm && TRACE_is_enabled()) { ResourcePtr cpu = static_cast(lmm_constraint_id(lmm_get_cnst_from_var(getModel()->getMaxminSystem(), getVariable(), 0))); TRACE_surf_host_set_utilization(cpu->getName(), getCategory(), m_lastValue, m_lastUpdate, now - m_lastUpdate); } @@ -1055,10 +1079,10 @@ void Action::updateRemainingLazy(double now) XBT_DEBUG("Updating action(%p): remains is now %f", this, m_remains); } - if(getModel() == static_cast(surf_network_model)) + if(getModel() == surf_network_model) { if (m_maxDuration != NO_MAX_DURATION) - double_update(&m_maxDuration, delta); + double_update(&m_maxDuration, delta, sg_surf_precision); //FIXME: duplicated code if ((m_remains <= 0) &&