-
- smx_process_t process = NULL;
- int i;
- double elapsed_time = 0.0;
- static int state_modifications = 1;
- static int first = 1;
-
- xbt_context_empty_trash();
- if(xbt_swag_size(simix_global->process_to_run) && (elapsed_time>0)) {
- DEBUG0("**************************************************");
- }
- if (first) {
- surf_solve();/* Takes traces into account. Returns 0.0 */
- first=0;
- }
- while ((process = xbt_swag_extract(simix_global->process_to_run))) {
- DEBUG2("Scheduling %s on %s",
- process->name,
- process->simdata->s_host->name);
- simix_global->current_process = process;
- xbt_context_schedule(process->simdata->context);
- /* fflush(NULL); */
- simix_global->current_process = NULL;
- }
-
- {
- surf_action_t action = NULL;
- surf_resource_t resource = NULL;
- smx_action_t smx_action = NULL;
-
- void *fun = NULL;
- void *arg = NULL;
-
- xbt_dynar_foreach(resource_list, i, resource) {
- if(xbt_swag_size(resource->common_public->states.failed_action_set) ||
- xbt_swag_size(resource->common_public->states.done_action_set)) {
- state_modifications = 1;
- }
- }
-
- if(!state_modifications) {
- DEBUG1("%f : Calling surf_solve",SIMIX_get_clock());
- elapsed_time = surf_solve();
- DEBUG1("Elapsed_time %f",elapsed_time);
- }
-
- while (surf_timer_resource->extension_public->get(&fun,(void*)&arg)) {
- DEBUG2("got %p %p", fun, arg);
- if(fun==SIMIX_process_create) {
- smx_process_arg_t args = arg;
- DEBUG2("Launching %s on %s", args->name, args->hostname);
- process = SIMIX_process_create(args->name, args->code,
- args->data, args->hostname,
- args->argc,args->argv,NULL);
- if(args->kill_time > SIMIX_get_clock()) {
- surf_timer_resource->extension_public->set(args->kill_time,
- (void*) &SIMIX_process_kill,
- (void*) process);
- }
- xbt_free(args);
- }
- if(fun==SIMIX_process_kill) {
- process = arg;
- DEBUG2("Killing %s on %s", process->name,
- process->simdata->s_host->name);
- SIMIX_process_kill(process);
- }
- }
-
- /* Wake up all process waiting for the action finish */
- xbt_dynar_foreach(resource_list, i, resource) {
- while ((action = xbt_swag_extract(resource->common_public->states.failed_action_set))) {
- smx_action = action->data;
- if (smx_action) {
- xbt_fifo_unshift(actions_failed,smx_action);
- }
- }
- while ((action =xbt_swag_extract(resource->common_public->states.done_action_set))) {
- smx_action = action->data;
- if (smx_action) {
- xbt_fifo_unshift(actions_done,smx_action);
- }
- }
- }
- }
- state_modifications = 0;
-
- if (elapsed_time == -1) {
- if (xbt_swag_size(simix_global->process_list) == 0) {
-/* INFO0("Congratulations ! Simulation terminated : all processes are over"); */
- } else {
- INFO0("Oops ! Deadlock or code not perfectly clean.");
- __SIMIX_display_process_status();
- if(XBT_LOG_ISENABLED(simix, xbt_log_priority_debug) ||
- XBT_LOG_ISENABLED(simix_kernel, xbt_log_priority_debug)) {
- DEBUG0("Aborting!");
- xbt_abort();
- }
- INFO0("Return a Warning.");
- }
- }
- return elapsed_time;
+ double time = 0;
+ smx_process_t process;
+ xbt_swag_t set;
+ surf_action_t action;
+ smx_timer_t timer;
+ surf_model_t model;
+ unsigned int iter;
+
+ do {
+ XBT_DEBUG("New Schedule Round; size(queue)=%lu",
+ xbt_dynar_length(simix_global->process_to_run));
+#ifdef TIME_BENCH_PER_SR
+ smx_ctx_raw_new_sr();
+#endif
+ while (!xbt_dynar_is_empty(simix_global->process_to_run)) {
+ XBT_DEBUG("New Sub-Schedule Round; size(queue)=%lu",
+ xbt_dynar_length(simix_global->process_to_run));
+
+ /* Run all processes that are ready to run, possibly in parallel */
+#ifdef TIME_BENCH_AMDAHL
+ xbt_os_timer_stop(simix_global->timer_seq);
+ xbt_os_timer_resume(simix_global->timer_par);
+#endif
+ SIMIX_process_runall();
+#ifdef TIME_BENCH_AMDAHL
+ xbt_os_timer_stop(simix_global->timer_par);
+ xbt_os_timer_resume(simix_global->timer_seq);
+#endif
+
+ /* Move all killing processes to the end of the list, because killing a process that have an ongoing simcall is a bad idea */
+ xbt_dynar_three_way_partition(simix_global->process_that_ran, process_syscall_color);
+
+ /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round */
+
+ /* WARNING, the order *must* be fixed or you'll jeopardize the simulation reproducibility (see RR-7653) */
+
+ /* Here, the order is ok because:
+ *
+ * Short proof: only maestro adds stuff to the process_to_run array, so the execution order of user contexts do not impact its order.
+ *
+ * Long proof: processes remain sorted through an arbitrary (implicit, complex but fixed) order in all cases.
+ *
+ * - if there is no kill during the simulation, processes remain sorted according by their PID.
+ * rational: This can be proved inductively.
+ * Assume that process_to_run is sorted at a beginning of one round (it is at round 0: the deployment file is parsed linearly).
+ * Let's show that it is still so at the end of this round.
+ * - if a process is added when being created, that's from maestro. It can be either at startup
+ * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled
+ * in arbitrary order (inductive hypothesis), we are fine.
+ * - If a process is added because it's getting killed, its subsequent actions shouldn't matter
+ * - If a process gets added to process_to_run because one of their blocking action constituting the meat
+ * of a simcall terminates, we're still good. Proof:
+ * - You are added from SIMIX_simcall_answer() only. When this function is called depends on the resource
+ * kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications as an example.
+ * - For communications, this function is called from SIMIX_comm_finish().
+ * This function itself don't mess with the order since simcalls are handled in FIFO order.
+ * The function is called:
+ * - before the comm starts (invalid parameters, or resource already dead or whatever).
+ * The order then trivial holds since maestro didn't interrupt its handling of the simcall yet
+ * - because the communication failed or were canceled after startup. In this case, it's called from the function
+ * we are in, by the chunk:
+ * set = model->states.failed_action_set;
+ * while ((action = xbt_swag_extract(set)))
+ * SIMIX_simcall_post((smx_action_t) action->data);
+ * This order is also fixed because it depends of the order in which the surf actions were
+ * added to the system, and only maestro can add stuff this way, through simcalls.
+ * We thus use the inductive hypothesis once again to conclude that the order in which actions are
+ * poped out of the swag does not depend on the user code's execution order.
+ * - because the communication terminated. In this case, actions are served in the order given by
+ * set = model->states.done_action_set;
+ * while ((action = xbt_swag_extract(set)))
+ * SIMIX_simcall_post((smx_action_t) action->data);
+ * and the argument is very similar to the previous one.
+ * So, in any case, the orders of calls to SIMIX_comm_finish() do not depend on the order in which user processes are executed.
+ * So, in any cases, the orders of processes within process_to_run do not depend on the order in which user processes were executed previously.
+ * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized.
+ * - If there is some process killings, the order is changed by this decision that comes from user-land
+ * But this decision may not have been motivated by a situation that were different because the simulation is not reproducible.
+ * So, even the order change induced by the process killing is perfectly reproducible.
+ *
+ * So science works, bitches [http://xkcd.com/54/].
+ *
+ * We could sort the process_that_ran array completely so that we can describe the order in which simcalls are handled
+ * (like "according to the PID of issuer"), but it's not mandatory (order is fixed already even if unfriendly).
+ * That would thus be a pure waste of time.
+ */
+
+ xbt_dynar_foreach(simix_global->process_that_ran, iter, process) {
+ if (process->simcall.call != SIMCALL_NONE) {
+ SIMIX_simcall_pre(&process->simcall, 0);
+ }
+ }
+ }
+
+ time = SIMIX_timer_next();
+ if (time != -1.0 || xbt_swag_size(simix_global->process_list) != 0)
+ time = surf_solve(time);
+
+ /* Notify all the hosts that have failed */
+ /* FIXME: iterate through the list of failed host and mark each of them */
+ /* as failed. On each host, signal all the running processes with host_fail */
+
+ /* Handle any pending timer */
+ while (xbt_heap_size(simix_timers) > 0 && SIMIX_get_clock() >= SIMIX_timer_next()) {
+ //FIXME: make the timers being real callbacks
+ // (i.e. provide dispatchers that read and expand the args)
+ timer = xbt_heap_pop(simix_timers);
+ if (timer->func)
+ ((void (*)(void*))timer->func)(timer->args);
+ xbt_free(timer);
+ }
+ /* Wake up all processes waiting for a Surf action to finish */
+ xbt_dynar_foreach(model_list, iter, model) {
+ set = model->states.failed_action_set;
+ while ((action = xbt_swag_extract(set)))
+ SIMIX_simcall_post((smx_action_t) action->data);
+ set = model->states.done_action_set;
+ while ((action = xbt_swag_extract(set)))
+ SIMIX_simcall_post((smx_action_t) action->data);
+ }
+
+ /* Clean processes to destroy */
+ SIMIX_process_empty_trash();
+
+ } while (time != -1.0 || !xbt_dynar_is_empty(simix_global->process_to_run));
+
+ if (xbt_swag_size(simix_global->process_list) != 0) {
+
+#ifdef HAVE_TRACING
+ TRACE_end();
+#endif
+
+ XBT_WARN("Oops ! Deadlock or code not perfectly clean.");
+ SIMIX_display_process_status();
+ xbt_abort();
+ }