Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix bug #14412 (Killing a SIMIX process just after migrate won't work)
[simgrid.git] / src / simix / smx_process.c
1 /* Copyright (c) 2007, 2008, 2009, 2010. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "smx_private.h"
8 #include "xbt/sysdep.h"
9 #include "xbt/log.h"
10 #include "xbt/dict.h"
11 #include "mc/mc.h"
12
13 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_process, simix,
14                                 "Logging specific to SIMIX (process)");
15
16 unsigned long simix_process_maxpid = 0;
17
18 /**
19  * \brief Returns the current agent.
20  *
21  * This functions returns the currently running SIMIX process.
22  *
23  * \return The SIMIX process
24  */
25 XBT_INLINE smx_process_t SIMIX_process_self(void)
26 {
27   smx_context_t self_context = SIMIX_context_self();
28
29   return self_context ? SIMIX_context_get_data(self_context) : NULL;
30 }
31
32 /**
33  * \brief Returns whether a process has pending asynchronous communications.
34  * \return true if there are asynchronous communications in this process
35  */
36 int SIMIX_process_has_pending_comms(smx_process_t process) {
37
38   return xbt_fifo_size(process->comms) > 0;
39 }
40
41 /**
42  * \brief Moves a process to the list of processes to destroy.
43  */
44 void SIMIX_process_cleanup(smx_process_t process)
45 {
46   XBT_DEBUG("Cleanup process %s (%p), waiting action %p",
47       process->name, process, process->waiting_action);
48
49   /* cancel non-blocking communications */
50   smx_action_t action;
51   while ((action = xbt_fifo_pop(process->comms))) {
52
53     /* make sure no one will finish the comm after this process is destroyed,
54      * because src_proc or dst_proc would be an invalid pointer */
55     SIMIX_comm_cancel(action);
56
57     if (action->comm.src_proc == process) {
58       XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d, src = %p, dst = %p",
59           action, action->comm.detached, (int)action->state, action->comm.src_proc, action->comm.dst_proc);
60       action->comm.src_proc = NULL;
61
62       if (action->comm.detached) {
63          if (action->comm.refcount == 0) {
64            XBT_DEBUG("Increase the refcount before destroying it since it's detached");
65            /* I'm not supposed to destroy a detached comm from the sender side,
66             * unless there is no receiver matching the rdv */
67            action->comm.refcount++;
68            SIMIX_comm_destroy(action);
69          }
70          else {
71            XBT_DEBUG("Don't destroy it since its refcount is %d", action->comm.refcount);
72          }
73       } else {
74         SIMIX_comm_destroy(action);
75       }
76     }
77     else if (action->comm.dst_proc == process){
78       XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p",
79           action, (int)action->state, action->comm.src_proc, action->comm.dst_proc);
80       action->comm.dst_proc = NULL;
81
82       if (action->comm.detached && action->comm.refcount == 1
83           && action->comm.src_proc != NULL) {
84         /* the comm will be freed right now, remove it from the sender */
85         xbt_fifo_remove(action->comm.src_proc->comms, action);
86       }
87       SIMIX_comm_destroy(action);
88     }
89     else {
90       xbt_die("Communication action %p is in my list but I'm not the sender "
91           "or the receiver", action);
92     }
93   }
94
95   /*xbt_swag_remove(process, simix_global->process_to_run);*/
96   xbt_swag_remove(process, simix_global->process_list);
97   xbt_swag_remove(process, process->smx_host->process_list);
98   xbt_swag_insert(process, simix_global->process_to_destroy);
99   process->context->iwannadie = 0;
100 }
101
102 /** 
103  * Garbage collection
104  *
105  * Should be called some time to time to free the memory allocated for processes
106  * that have finished (or killed).
107  */
108 void SIMIX_process_empty_trash(void)
109 {
110   smx_process_t process = NULL;
111
112   while ((process = xbt_swag_extract(simix_global->process_to_destroy))) {
113     SIMIX_context_free(process->context);
114
115     /* Free the exception allocated at creation time */
116     free(process->running_ctx);
117     xbt_dict_free(&process->properties);
118
119     xbt_fifo_free(process->comms);
120
121     xbt_dynar_free(&process->on_exit);
122
123     free(process->name);
124     free(process);
125   }
126 }
127
128 /**
129  * \brief Creates and runs the maestro process
130  */
131 void SIMIX_create_maestro_process()
132 {
133   smx_process_t maestro = NULL;
134
135   /* Create maestro process and intilialize it */
136   maestro = xbt_new0(s_smx_process_t, 1);
137   maestro->pid = simix_process_maxpid++;
138   maestro->name = (char *) "";
139   maestro->running_ctx = xbt_new(xbt_running_ctx_t, 1);
140   XBT_RUNNING_CTX_INITIALIZE(maestro->running_ctx);
141   maestro->context = SIMIX_context_new(NULL, 0, NULL, NULL, maestro);
142   maestro->simcall.issuer = maestro;
143
144   simix_global->maestro_process = maestro;
145   return;
146 }
147
148 /**
149  * \brief Same as SIMIX_process_create() but with only one argument (used by timers).
150  * \return the process created
151  */
152 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
153
154   smx_process_t process;
155   simix_global->create_process_function(
156       &process,
157       args->name,
158       args->code,
159       args->data,
160       args->hostname,
161       args->kill_time,
162       args->argc,
163       args->argv,
164       args->properties);
165
166   return process;
167 }
168
169 /**
170  * \brief Internal function to create a process.
171  *
172  * This function actually creates the process.
173  * It may be called when a SIMCALL_PROCESS_CREATE simcall occurs,
174  * or directly for SIMIX internal purposes. The sure thing is that it's called from maestro context.
175  *
176  * \return the process created
177  */
178 void SIMIX_process_create(smx_process_t *process,
179                           const char *name,
180                           xbt_main_func_t code,
181                           void *data,
182                           const char *hostname,
183                           double kill_time,
184                           int argc, char **argv,
185                           xbt_dict_t properties) {
186
187   *process = NULL;
188   smx_host_t host = SIMIX_host_get_by_name(hostname);
189
190   XBT_DEBUG("Start process %s on host %s", name, hostname);
191
192   if (!SIMIX_host_get_state(host)) {
193     XBT_WARN("Cannot launch process '%s' on failed host '%s'", name,
194           hostname);
195   }
196   else {
197     *process = xbt_new0(s_smx_process_t, 1);
198
199     xbt_assert(((code != NULL) && (host != NULL)), "Invalid parameters");
200
201     /* Process data */
202     (*process)->pid = simix_process_maxpid++;
203     (*process)->name = xbt_strdup(name);
204     (*process)->smx_host = host;
205     (*process)->data = data;
206     (*process)->comms = xbt_fifo_new();
207     (*process)->simcall.issuer = *process;
208
209     XBT_VERB("Create context %s", (*process)->name);
210     (*process)->context = SIMIX_context_new(code, argc, argv,
211       simix_global->cleanup_process_function, *process);
212
213     (*process)->running_ctx = xbt_new(xbt_running_ctx_t, 1);
214     XBT_RUNNING_CTX_INITIALIZE((*process)->running_ctx);
215
216     /* Add properties */
217     (*process)->properties = properties;
218
219     /* Add the process to it's host process list */
220     xbt_swag_insert(*process, host->process_list);
221
222     XBT_DEBUG("Start context '%s'", (*process)->name);
223
224     /* Now insert it in the global process list and in the process to run list */
225     xbt_swag_insert(*process, simix_global->process_list);
226     XBT_DEBUG("Inserting %s(%s) in the to_run list", (*process)->name, host->name);
227     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, *process);
228   }
229
230   if (kill_time > SIMIX_get_clock()) {
231     if (simix_global->kill_process_function) {
232       XBT_DEBUG("Process %s(%s) will be kill at time %f", (*process)->name,
233           (*process)->smx_host->name, kill_time);
234       SIMIX_timer_set(kill_time, simix_global->kill_process_function, *process);
235     }
236   }
237 }
238
239 /**
240  * \brief Executes the processes from simix_global->process_to_run.
241  *
242  * The processes of simix_global->process_to_run are run (in parallel if
243  * possible).  On exit, simix_global->process_to_run is empty, and
244  * simix_global->process_that_ran contains the list of processes that just ran.
245  * The two lists are swapped so, be careful when using them before and after a
246  * call to this function.
247  */
248 void SIMIX_process_runall(void)
249 {
250   SIMIX_context_runall();
251
252   xbt_dynar_t tmp = simix_global->process_that_ran;
253   simix_global->process_that_ran = simix_global->process_to_run;
254   simix_global->process_to_run = tmp;
255   xbt_dynar_reset(simix_global->process_to_run);
256 }
257
258 /**
259  * \brief Internal function to kill a SIMIX process.
260  *
261  * This function may be called when a SIMCALL_PROCESS_KILL simcall occurs,
262  * or directly for SIMIX internal purposes.
263  *
264  * \param process poor victim
265  */
266 void SIMIX_process_kill(smx_process_t process) {
267
268   XBT_DEBUG("Killing process %s on %s", process->name, process->smx_host->name);
269
270   process->context->iwannadie = 1;
271   process->blocked = 0;
272   process->suspended = 0;
273   /* FIXME: set doexception to 0 also? */
274
275   /* destroy the blocking action if any */
276   if (process->waiting_action) {
277
278     switch (process->waiting_action->type) {
279
280       case SIMIX_ACTION_EXECUTE:          
281       case SIMIX_ACTION_PARALLEL_EXECUTE:
282         SIMIX_host_execution_destroy(process->waiting_action);
283         break;
284
285       case SIMIX_ACTION_COMMUNICATE:
286         xbt_fifo_remove(process->comms, process->waiting_action);
287         SIMIX_comm_destroy(process->waiting_action);
288         break;
289
290       case SIMIX_ACTION_SLEEP:
291   SIMIX_process_sleep_destroy(process->waiting_action);
292   break;
293
294       case SIMIX_ACTION_SYNCHRO:
295   SIMIX_synchro_stop_waiting(process, &process->simcall);
296   SIMIX_synchro_destroy(process->waiting_action);
297   break;
298
299       case SIMIX_ACTION_IO:
300         SIMIX_io_destroy(process->waiting_action);
301         break;
302     }
303   }
304   if(!xbt_dynar_member(simix_global->process_to_run, &(process)))
305     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
306 }
307
308 /**
309  * \brief Kills all running processes.
310  * \param issuer this one will not be killed
311  */
312 void SIMIX_process_killall(smx_process_t issuer)
313 {
314   smx_process_t p = NULL;
315
316   while ((p = xbt_swag_extract(simix_global->process_list))) {
317     if (p != issuer) {
318       SIMIX_process_kill(p);
319     }
320   }
321
322   SIMIX_context_runall();
323
324   SIMIX_process_empty_trash();
325 }
326
327 void SIMIX_process_change_host(smx_process_t process,
328              smx_host_t dest)
329 {
330   xbt_assert((process != NULL), "Invalid parameters");
331   xbt_swag_remove(process, process->smx_host->process_list);
332   process->smx_host = dest;
333   xbt_swag_insert(process, dest->process_list);
334 }
335
336 void SIMIX_pre_process_change_host(smx_process_t process, smx_host_t dest)
337 {
338   process->new_host = dest;
339 }
340
341 void SIMIX_pre_process_suspend(smx_simcall_t simcall)
342 {
343   smx_process_t process = simcall->process_suspend.process;
344   smx_action_t action_suspend =
345       SIMIX_process_suspend(process, simcall->issuer);
346
347   if (process != simcall->issuer) {
348     SIMIX_simcall_answer(simcall);
349   } else {
350     xbt_fifo_push(action_suspend->simcalls, simcall);
351     process->waiting_action = action_suspend;
352     SIMIX_host_execution_suspend(process->waiting_action);
353   }
354   /* If we are suspending ourselves, then just do not finish the simcall now */
355 }
356
357 smx_action_t SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
358 {
359   xbt_assert((process != NULL), "Invalid parameters");
360
361   if (process->suspended) {
362     XBT_DEBUG("Process '%s' is already suspended", process->name);
363     return NULL;
364   }
365
366   process->suspended = 1;
367
368   /* If we are suspending another process, and it is waiting on an action,
369      suspend its action. */
370   if (process != issuer) {
371
372     if (process->waiting_action) {
373
374       switch (process->waiting_action->type) {
375
376         case SIMIX_ACTION_EXECUTE:
377         case SIMIX_ACTION_PARALLEL_EXECUTE:
378           SIMIX_host_execution_suspend(process->waiting_action);
379           break;
380
381         case SIMIX_ACTION_COMMUNICATE:
382           SIMIX_comm_suspend(process->waiting_action);
383           break;
384
385         case SIMIX_ACTION_SLEEP:
386           SIMIX_process_sleep_suspend(process->waiting_action);
387           break;
388
389         case SIMIX_ACTION_SYNCHRO:
390           /* Suspension is delayed to when the process is rescheduled. */
391           break;
392
393         default:
394           xbt_die("Internal error in SIMIX_process_suspend: unexpected action type %d",
395               (int)process->waiting_action->type);
396       }
397       return NULL;
398     } else {
399       /* Suspension is delayed to when the process is rescheduled. */
400       return NULL;
401     }
402   } else {
403     return SIMIX_host_execute("suspend", process->smx_host, 0.0, 1.0);
404   }
405 }
406
407 void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
408 {
409   xbt_assert((process != NULL), "Invalid parameters");
410
411   XBT_IN("process = %p, issuer = %p", process, issuer);
412
413   if(process->context->iwannadie) {
414     XBT_VERB("Ignoring request to suspend a process that is currently dying.");
415     return;
416   }
417
418   if(!process->suspended) return;
419   process->suspended = 0;
420
421   /* If we are resuming another process, resume the action it was waiting for
422      if any. Otherwise add it to the list of process to run in the next round. */
423   if (process != issuer) {
424
425     if (process->waiting_action) {
426
427       switch (process->waiting_action->type) {
428
429         case SIMIX_ACTION_EXECUTE:          
430         case SIMIX_ACTION_PARALLEL_EXECUTE:
431           SIMIX_host_execution_resume(process->waiting_action);
432           break;
433
434         case SIMIX_ACTION_COMMUNICATE:
435           SIMIX_comm_resume(process->waiting_action);
436           break;
437
438         case SIMIX_ACTION_SLEEP:
439           SIMIX_process_sleep_resume(process->waiting_action);
440           break;
441
442         case SIMIX_ACTION_SYNCHRO:
443           /* I cannot resume it now. This is delayed to when the process is rescheduled at
444            * the end of the synchro. */
445           break;
446
447         default:
448           xbt_die("Internal error in SIMIX_process_resume: unexpected action type %d",
449               (int)process->waiting_action->type);
450       }
451     }
452   } else XBT_WARN("Strange. Process %p is trying to resume himself.", issuer);
453
454   XBT_OUT();
455 }
456
457 int SIMIX_process_get_maxpid(void) {
458   return simix_process_maxpid;
459 }
460
461 int SIMIX_process_count(void)
462 {
463   return xbt_swag_size(simix_global->process_list);
464 }
465
466 void* SIMIX_process_self_get_data(smx_process_t self)
467 {
468   xbt_assert(self == SIMIX_process_self(), "This is not the current process");
469
470   if (!self) {
471     return NULL;
472   }
473   return SIMIX_process_get_data(self);
474 }
475
476 void SIMIX_process_self_set_data(smx_process_t self, void *data)
477 {
478   xbt_assert(self == SIMIX_process_self(), "This is not the current process");
479
480   SIMIX_process_set_data(self, data);
481 }
482
483 void* SIMIX_process_get_data(smx_process_t process)
484 {
485   return process->data;
486 }
487
488 void SIMIX_process_set_data(smx_process_t process, void *data)
489 {
490   process->data = data;
491 }
492
493 smx_host_t SIMIX_process_get_host(smx_process_t process)
494 {
495   return process->smx_host;
496 }
497
498 /* needs to be public and without simcall because it is called
499    by exceptions and logging events */
500 const char* SIMIX_process_self_get_name(void) {
501
502   smx_process_t process = SIMIX_process_self();
503   if (process == NULL || process == simix_global->maestro_process)
504     return "";
505
506   return SIMIX_process_get_name(process);
507 }
508
509 const char* SIMIX_process_get_name(smx_process_t process)
510 {
511   return process->name;
512 }
513
514 smx_process_t SIMIX_process_get_by_name(const char* name)
515 {
516   smx_process_t proc;
517
518   xbt_swag_foreach(proc, simix_global->process_list)
519   {
520     if(!strcmp(name, proc->name))
521       return proc;
522   }
523   return NULL;
524 }
525
526 int SIMIX_process_is_suspended(smx_process_t process)
527 {
528   return process->suspended;
529 }
530
531 xbt_dict_t SIMIX_process_get_properties(smx_process_t process)
532 {
533   return process->properties;
534 }
535
536 void SIMIX_pre_process_sleep(smx_simcall_t simcall)
537 {
538   if (MC_IS_ENABLED) {
539     MC_process_clock_add(simcall->issuer, simcall->process_sleep.duration);
540     simcall->process_sleep.result = SIMIX_DONE;
541     SIMIX_simcall_answer(simcall);
542     return;
543   }
544   smx_action_t action = SIMIX_process_sleep(simcall->issuer, simcall->process_sleep.duration);
545   xbt_fifo_push(action->simcalls, simcall);
546   simcall->issuer->waiting_action = action;
547 }
548
549 smx_action_t SIMIX_process_sleep(smx_process_t process, double duration)
550 {
551   smx_action_t action;
552   smx_host_t host = process->smx_host;
553
554   /* check if the host is active */
555   if (surf_workstation_model->extension.
556       workstation.get_state(host->host) != SURF_RESOURCE_ON) {
557     THROWF(host_error, 0, "Host %s failed, you cannot call this function",
558            host->name);
559   }
560
561   action = xbt_mallocator_get(simix_global->action_mallocator);
562   action->type = SIMIX_ACTION_SLEEP;
563   action->name = NULL;
564 #ifdef HAVE_TRACING
565   action->category = NULL;
566 #endif
567
568   action->sleep.host = host;
569   action->sleep.surf_sleep =
570       surf_workstation_model->extension.workstation.sleep(host->host, duration);
571
572   surf_workstation_model->action_data_set(action->sleep.surf_sleep, action);
573   XBT_DEBUG("Create sleep action %p", action);
574
575   return action;
576 }
577
578 void SIMIX_post_process_sleep(smx_action_t action)
579 {
580   smx_simcall_t simcall;
581   e_smx_state_t state;
582
583   while ((simcall = xbt_fifo_shift(action->simcalls))) {
584
585     switch(surf_workstation_model->action_state_get(action->sleep.surf_sleep)){
586       case SURF_ACTION_FAILED:
587         SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
588         break;
589
590       case SURF_ACTION_DONE:
591         state = SIMIX_DONE;
592         break;
593
594       default:
595         THROW_IMPOSSIBLE;
596         break;
597     }
598     simcall->process_sleep.result = state;
599     simcall->issuer->waiting_action = NULL;
600     SIMIX_simcall_answer(simcall);
601   }
602   SIMIX_process_sleep_destroy(action);
603 }
604
605 void SIMIX_process_sleep_destroy(smx_action_t action)
606 {
607   XBT_DEBUG("Destroy action %p", action);
608   if (action->sleep.surf_sleep)
609     action->sleep.surf_sleep->model_type->action_unref(action->sleep.surf_sleep);
610   xbt_mallocator_release(simix_global->action_mallocator, action);
611 }
612
613 void SIMIX_process_sleep_suspend(smx_action_t action)
614 {
615   surf_workstation_model->suspend(action->sleep.surf_sleep);
616 }
617
618 void SIMIX_process_sleep_resume(smx_action_t action)
619 {
620   surf_workstation_model->resume(action->sleep.surf_sleep);
621 }
622
623 /** 
624  * \brief Calling this function makes the process to yield.
625  *
626  * Only the current process can call this function, giving back the control to
627  * maestro.
628  *
629  * \param self the current process
630  */
631 void SIMIX_process_yield(smx_process_t self)
632 {
633   XBT_DEBUG("Yield process '%s'", self->name);
634
635   /* Go into sleep and return control to maestro */
636   SIMIX_context_suspend(self->context);
637
638   /* Ok, maestro returned control to us */
639   XBT_DEBUG("Control returned to me: '%s'", self->name);
640
641   if (self->new_host) {
642     SIMIX_process_change_host(self, self->new_host);
643     self->new_host = NULL;
644   }
645
646   if (self->context->iwannadie){
647     XBT_DEBUG("I wanna die!");
648     SIMIX_context_stop(self->context);
649   }
650
651   if(self->suspended) {
652     xbt_assert(!self->doexception, "Gloups! This exception may be lost by subsequent calls.");
653     self->suspended = 0;
654     SIMIX_process_suspend(self,self);
655   }
656
657   if (self->doexception) {
658     XBT_DEBUG("Wait, maestro left me an exception");
659     self->doexception = 0;
660     SMX_THROW();
661   }
662 }
663
664 /* callback: context fetching */
665 xbt_running_ctx_t *SIMIX_process_get_running_context(void)
666 {
667   return SIMIX_process_self()->running_ctx;
668 }
669
670 /* callback: termination */
671 void SIMIX_process_exception_terminate(xbt_ex_t * e)
672 {
673   xbt_ex_display(e);
674   abort();
675 }
676
677 smx_context_t SIMIX_process_get_context(smx_process_t p) {
678   return p->context;
679 }
680
681 void SIMIX_process_set_context(smx_process_t p,smx_context_t c) {
682   p->context = c;
683 }
684
685 /**
686  * \brief Returns the list of processes to run.
687  */
688 xbt_dynar_t SIMIX_process_get_runnable(void)
689 {
690   return simix_global->process_to_run;
691 }
692
693 /**
694  * \brief Returns the process from PID.
695  */
696 smx_process_t SIMIX_process_from_PID(int PID)
697 {
698   smx_process_t proc;
699   xbt_swag_foreach(proc, simix_global->process_list)
700   {
701    if(proc->pid == PID)
702    return proc;
703   }
704   return NULL;
705 }
706
707 /** @brief returns a dynar containg all currently existing processes */
708 xbt_dynar_t SIMIX_processes_as_dynar(void) {
709   smx_process_t proc;
710   xbt_dynar_t res = xbt_dynar_new(sizeof(smx_process_t),NULL);
711   xbt_swag_foreach(proc, simix_global->process_list) {
712     xbt_dynar_push(res,&proc);
713   }
714   return res;
715 }
716 void SIMIX_process_on_exit_runall(smx_process_t process) {
717   int cpt;
718   if (!process->on_exit) {
719     return;
720   }
721
722   smx_process_exit_fun_t exit_fun;
723
724   for (cpt = xbt_dynar_length(process->on_exit) - 1; cpt >= 0; cpt--) {
725     exit_fun = xbt_dynar_get_ptr(process->on_exit, cpt);
726     (exit_fun->fun)(exit_fun->arg);
727   }
728 }
729 void SIMIX_process_on_exit(int_f_pvoid_t fun, void *data) {
730   smx_process_t process = SIMIX_process_self();
731   xbt_assert(process, "current process not found: are you in maestro context ?");
732
733   if (!process->on_exit) {
734     process->on_exit = xbt_dynar_new(sizeof(s_smx_process_exit_fun_t), NULL);
735   }
736
737   s_smx_process_exit_fun_t exit_fun = {fun, data};
738
739   xbt_dynar_push_as(process->on_exit,s_smx_process_exit_fun_t,exit_fun);
740 }