Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix possible crashes and leaks with dsends during processes cleanup
[simgrid.git] / src / simix / smx_process.c
1 /* Copyright (c) 2007, 2008, 2009, 2010. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "private.h"
8 #include "xbt/sysdep.h"
9 #include "xbt/log.h"
10 #include "xbt/dict.h"
11 #include "msg/mailbox.h"
12 #include "mc/mc.h"
13
14 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_process, simix,
15                                 "Logging specific to SIMIX (process)");
16
17 unsigned long simix_process_maxpid = 0;
18
19 /**
20  * \brief Returns the current agent.
21  *
22  * This functions returns the currently running SIMIX process.
23  *
24  * \return The SIMIX process
25  */
26 XBT_INLINE smx_process_t SIMIX_process_self(void)
27 {
28   smx_context_t self_context = SIMIX_context_self();
29
30   return self_context ? SIMIX_context_get_data(self_context) : NULL;
31 }
32
33 /**
34  * \brief Move a process to the list of processes to destroy.
35  */
36 void SIMIX_process_cleanup(smx_process_t process)
37 {
38   XBT_DEBUG("Cleanup process %s (%p), waiting action %p",
39       process->name, process, process->waiting_action);
40
41   /* cancel non-blocking communications */
42   smx_action_t action;
43   while ((action = xbt_fifo_pop(process->comms))) {
44
45     /* make sure no one will finish the comm after this process is destroyed */
46     SIMIX_comm_cancel(action);
47
48     if (action->comm.src_proc == process) {
49       XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d, src = %p, dst = %p",
50           action, action->comm.detached, action->state, action->comm.src_proc, action->comm.dst_proc);
51       action->comm.src_proc = NULL;
52
53       if (action->comm.detached) {
54          if (action->comm.refcount == 0) {
55            /* I'm not supposed to destroy a detached comm from the sender side,
56             * unless there is no receiver matching the rdv */
57            action->comm.refcount++;
58            SIMIX_comm_destroy(action);
59          }
60       }
61       else {
62         SIMIX_comm_destroy(action);
63       }
64     }
65     else if (action->comm.dst_proc == process){
66       XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p",
67           action, action->state, action->comm.src_proc, action->comm.dst_proc);
68       action->comm.dst_proc = NULL;
69
70       if (action->comm.detached && action->comm.refcount == 1
71           && action->comm.src_proc != NULL) {
72         /* the comm will be freed right now, remove it from the sender */
73         xbt_fifo_remove(action->comm.src_proc->comms, action);
74       }
75       SIMIX_comm_destroy(action);
76     }
77     else {
78       xbt_die("Communication action %p is in my list but I'm not the sender "
79           "or the receiver", action);
80     }
81   }
82
83   /*xbt_swag_remove(process, simix_global->process_to_run);*/
84   xbt_swag_remove(process, simix_global->process_list);
85   xbt_swag_remove(process, process->smx_host->process_list);
86   xbt_swag_insert(process, simix_global->process_to_destroy);
87 }
88
89 /** 
90  * Garbage collection
91  *
92  * Should be called some time to time to free the memory allocated for processes
93  * that have finished (or killed).
94  */
95 void SIMIX_process_empty_trash(void)
96 {
97   smx_process_t process = NULL;
98
99   while ((process = xbt_swag_extract(simix_global->process_to_destroy))) {
100     SIMIX_context_free(process->context);
101
102     /* Free the exception allocated at creation time */
103     if (process->running_ctx)
104       free(process->running_ctx);
105     if (process->properties)
106       xbt_dict_free(&process->properties);
107
108     xbt_fifo_free(process->comms);
109
110     free(process->name);
111     process->name = NULL;
112     free(process);
113   }
114 }
115
116 /**
117  * \brief Creates and runs the maestro process
118  */
119 void SIMIX_create_maestro_process()
120 {
121   smx_process_t maestro = NULL;
122
123   /* Create maestro process and intilialize it */
124   maestro = xbt_new0(s_smx_process_t, 1);
125   maestro->pid = simix_process_maxpid++;
126   maestro->name = (char *) "";
127   maestro->running_ctx = xbt_new(xbt_running_ctx_t, 1);
128   XBT_RUNNING_CTX_INITIALIZE(maestro->running_ctx);
129   maestro->context = SIMIX_context_new(NULL, 0, NULL, NULL, maestro);
130   maestro->request.issuer = maestro;
131
132   simix_global->maestro_process = maestro;
133   return;
134 }
135
136 /**
137  * \brief Same as SIMIX_process_create() but with only one argument (used by timers).
138  * \return the process created
139  */
140 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
141
142   smx_process_t process;
143   simix_global->create_process_function(
144       &process,
145       args->name,
146       args->code,
147       args->data,
148       args->hostname,
149       args->argc,
150       args->argv,
151       args->properties);
152
153   return process;
154 }
155
156 /**
157  * \brief Internal function to create a process.
158  *
159  * This function actually creates the process.
160  * It may be called when a REQ_PROCESS_CREATE request occurs,
161  * or directly for SIMIX internal purposes.
162  *
163  * \return the process created
164  */
165 void SIMIX_process_create(smx_process_t *process,
166                           const char *name,
167                           xbt_main_func_t code,
168                           void *data,
169                           const char *hostname,
170                           int argc, char **argv,
171                           xbt_dict_t properties) {
172
173   *process = NULL;
174   smx_host_t host = SIMIX_host_get_by_name(hostname);
175
176   XBT_DEBUG("Start process %s on host %s", name, hostname);
177
178   if (!SIMIX_host_get_state(host)) {
179     XBT_WARN("Cannot launch process '%s' on failed host '%s'", name,
180           hostname);
181   }
182   else {
183     *process = xbt_new0(s_smx_process_t, 1);
184
185     xbt_assert(((code != NULL) && (host != NULL)), "Invalid parameters");
186
187     /* Process data */
188     (*process)->pid = simix_process_maxpid++;
189     (*process)->name = xbt_strdup(name);
190     (*process)->smx_host = host;
191     (*process)->data = data;
192     (*process)->comms = xbt_fifo_new();
193
194     XBT_VERB("Create context %s", (*process)->name);
195     (*process)->context = SIMIX_context_new(code, argc, argv,
196         simix_global->cleanup_process_function, *process);
197
198     (*process)->running_ctx = xbt_new(xbt_running_ctx_t, 1);
199     XBT_RUNNING_CTX_INITIALIZE((*process)->running_ctx);
200
201     /* Add properties */
202     (*process)->properties = properties;
203
204     /* Add the process to it's host process list */
205     xbt_swag_insert(*process, host->process_list);
206
207     XBT_DEBUG("Start context '%s'", (*process)->name);
208
209     /* Now insert it in the global process list and in the process to run list */
210     xbt_swag_insert(*process, simix_global->process_list);
211     XBT_DEBUG("Inserting %s(%s) in the to_run list", (*process)->name, host->name);
212     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, *process);
213   }
214 }
215
216 /**
217  * \brief Executes the processes from simix_global->process_to_run.
218  *
219  * The processes of simix_global->process_to_run are run (in parallel if
220  * possible).  On exit, simix_global->process_to_run is empty, and
221  * simix_global->process_that_ran contains the list of processes that just ran.
222  * The two lists are swapped so, be careful when using them before and after a
223  * call to this function.
224  */
225 void SIMIX_process_runall(void)
226 {
227   SIMIX_context_runall();
228
229   xbt_dynar_t tmp = simix_global->process_that_ran;
230   simix_global->process_that_ran = simix_global->process_to_run;
231   simix_global->process_to_run = tmp;
232   xbt_dynar_reset(simix_global->process_to_run);
233 }
234
235 /**
236  * \brief Internal function to kill a SIMIX process.
237  *
238  * This function may be called when a REQ_PROCESS_KILL request occurs,
239  * or directly for SIMIX internal purposes.
240  *
241  * \param process poor victim
242  */
243 void SIMIX_process_kill(smx_process_t process) {
244
245   XBT_DEBUG("Killing process %s on %s", process->name, process->smx_host->name);
246
247   process->context->iwannadie = 1;
248   process->blocked = 0;
249   process->suspended = 0;
250   /* FIXME: set doexception to 0 also? */
251
252   /* destroy the blocking action if any */
253   if (process->waiting_action) {
254
255     switch (process->waiting_action->type) {
256
257       case SIMIX_ACTION_EXECUTE:          
258       case SIMIX_ACTION_PARALLEL_EXECUTE:
259         SIMIX_host_execution_destroy(process->waiting_action);
260         break;
261
262       case SIMIX_ACTION_COMMUNICATE:
263         SIMIX_comm_destroy(process->waiting_action);
264         break;
265
266       case SIMIX_ACTION_SLEEP:
267         SIMIX_process_sleep_destroy(process->waiting_action);
268         break;
269
270       case SIMIX_ACTION_SYNCHRO:
271         SIMIX_synchro_stop_waiting(process, &process->request);
272         SIMIX_synchro_destroy(process->waiting_action);
273         break;
274
275       case SIMIX_ACTION_IO:
276         THROW_UNIMPLEMENTED;
277         break;
278     }
279   }
280
281   xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
282 }
283
284 /**
285  * \brief Kills all running processes.
286  * \param issuer this one will not be killed
287  */
288 void SIMIX_process_killall(smx_process_t issuer)
289 {
290   smx_process_t p = NULL;
291
292   while ((p = xbt_swag_extract(simix_global->process_list))) {
293     if (p != issuer) {
294       SIMIX_process_kill(p);
295     }
296   }
297
298   SIMIX_context_runall(simix_global->process_to_run);
299
300   SIMIX_process_empty_trash();
301 }
302
303 void SIMIX_process_change_host(smx_process_t process,
304                                smx_host_t dest)
305 {
306   xbt_assert((process != NULL), "Invalid parameters");
307   xbt_swag_remove(process, process->smx_host->process_list);
308   process->smx_host = dest;
309   xbt_swag_insert(process, dest->process_list);
310 }
311
312 void SIMIX_pre_process_change_host(smx_process_t process, smx_host_t dest)
313 {
314   process->new_host = dest;
315 }
316
317 void SIMIX_pre_process_suspend(smx_req_t req)
318 {
319   smx_process_t process = req->process_suspend.process;
320   SIMIX_process_suspend(process, req->issuer);
321
322   if (process != req->issuer) {
323     SIMIX_request_answer(req);
324   }
325   /* If we are suspending ourselves, then just do not replay the request. */
326 }
327
328 void SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
329 {
330   xbt_assert((process != NULL), "Invalid parameters");
331
332   if (process->suspended) {
333     XBT_DEBUG("Process '%s' is already suspended", process->name);
334     return;
335   }
336
337   process->suspended = 1;
338
339   /* If we are suspending another process, and it is waiting on an action,
340      suspend its action. */
341   if (process != issuer) {
342
343     if (process->waiting_action) {
344
345       switch (process->waiting_action->type) {
346
347         case SIMIX_ACTION_EXECUTE:
348         case SIMIX_ACTION_PARALLEL_EXECUTE:
349           SIMIX_host_execution_suspend(process->waiting_action);
350           break;
351
352         case SIMIX_ACTION_COMMUNICATE:
353           SIMIX_comm_suspend(process->waiting_action);
354           break;
355
356         case SIMIX_ACTION_SLEEP:
357           SIMIX_process_sleep_suspend(process->waiting_action);
358           break;
359
360         default:
361           xbt_die("Internal error in SIMIX_process_suspend: unexpected action type %d",
362               process->waiting_action->type);
363       }
364     }
365   }
366 }
367
368 void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
369 {
370   xbt_assert((process != NULL), "Invalid parameters");
371
372   if (!process->suspended) {
373     XBT_DEBUG("Process '%s' is not suspended", process->name);
374     return;
375   }
376
377   process->suspended = 0;
378
379   /* If we are resuming another process, resume the action it was waiting for
380      if any. Otherwise add it to the list of process to run in the next round. */
381   if (process != issuer) {
382
383     if (process->waiting_action) {
384
385       switch (process->waiting_action->type) {
386
387         case SIMIX_ACTION_EXECUTE:          
388         case SIMIX_ACTION_PARALLEL_EXECUTE:
389           SIMIX_host_execution_resume(process->waiting_action);
390           break;
391
392         case SIMIX_ACTION_COMMUNICATE:
393           SIMIX_comm_resume(process->waiting_action);
394           break;
395
396         case SIMIX_ACTION_SLEEP:
397           SIMIX_process_sleep_resume(process->waiting_action);
398           break;
399
400         default:
401           xbt_die("Internal error in SIMIX_process_resume: unexpected action type %d",
402               process->waiting_action->type);
403       }
404     }
405     else {
406       xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
407     }
408   }
409 }
410
411 int SIMIX_process_get_maxpid(void) {
412   return simix_process_maxpid;
413 }
414
415 int SIMIX_process_count(void)
416 {
417   return xbt_swag_size(simix_global->process_list);
418 }
419
420 void* SIMIX_process_self_get_data(void)
421 {
422   smx_process_t me = SIMIX_process_self();
423   if (!me) {
424     return NULL;
425   }
426   return SIMIX_process_get_data(me);
427 }
428
429 void SIMIX_process_self_set_data(void *data)
430 {
431   SIMIX_process_set_data(SIMIX_process_self(), data);
432 }
433
434 void* SIMIX_process_get_data(smx_process_t process)
435 {
436   return process->data;
437 }
438
439 void SIMIX_process_set_data(smx_process_t process, void *data)
440 {
441   process->data = data;
442 }
443
444 smx_host_t SIMIX_process_get_host(smx_process_t process)
445 {
446   return process->smx_host;
447 }
448
449 /* needs to be public and without request because it is called
450    by exceptions and logging events */
451 const char* SIMIX_process_self_get_name(void) {
452
453   smx_process_t process = SIMIX_process_self();
454   if (process == NULL || process == simix_global->maestro_process)
455     return "";
456
457   return SIMIX_process_get_name(process);
458 }
459
460 const char* SIMIX_process_get_name(smx_process_t process)
461 {
462   return process->name;
463 }
464
465 smx_process_t SIMIX_process_get_by_name(const char* name)
466 {
467   smx_process_t proc;
468
469   xbt_swag_foreach(proc, simix_global->process_list)
470   {
471     if(!strcmp(name, proc->name))
472       return proc;
473   }
474   return NULL;
475 }
476
477 int SIMIX_process_is_suspended(smx_process_t process)
478 {
479   return process->suspended;
480 }
481
482 xbt_dict_t SIMIX_process_get_properties(smx_process_t process)
483 {
484   return process->properties;
485 }
486
487 void SIMIX_pre_process_sleep(smx_req_t req)
488 {
489   if (MC_IS_ENABLED) {
490     MC_process_clock_add(req->issuer, req->process_sleep.duration);
491     req->process_sleep.result = SIMIX_DONE;
492     SIMIX_request_answer(req);
493     return;
494   }
495   smx_action_t action = SIMIX_process_sleep(req->issuer, req->process_sleep.duration);
496   xbt_fifo_push(action->request_list, req);
497   req->issuer->waiting_action = action;
498 }
499
500 smx_action_t SIMIX_process_sleep(smx_process_t process, double duration)
501 {
502   smx_action_t action;
503   smx_host_t host = process->smx_host;
504
505   /* check if the host is active */
506   if (surf_workstation_model->extension.
507       workstation.get_state(host->host) != SURF_RESOURCE_ON) {
508     THROWF(host_error, 0, "Host %s failed, you cannot call this function",
509            host->name);
510   }
511
512   action = xbt_mallocator_get(simix_global->action_mallocator);
513   action->type = SIMIX_ACTION_SLEEP;
514   action->name = NULL;
515 #ifdef HAVE_TRACING
516   action->category = NULL;
517 #endif
518
519   action->sleep.host = host;
520   action->sleep.surf_sleep =
521       surf_workstation_model->extension.workstation.sleep(host->host, duration);
522
523   surf_workstation_model->action_data_set(action->sleep.surf_sleep, action);
524   XBT_DEBUG("Create sleep action %p", action);
525
526   return action;
527 }
528
529 void SIMIX_post_process_sleep(smx_action_t action)
530 {
531   smx_req_t req;
532   e_smx_state_t state;
533
534   while ((req = xbt_fifo_shift(action->request_list))) {
535
536     switch(surf_workstation_model->action_state_get(action->sleep.surf_sleep)){
537       case SURF_ACTION_FAILED:
538         state = SIMIX_SRC_HOST_FAILURE;
539         break;
540
541       case SURF_ACTION_DONE:
542         state = SIMIX_DONE;
543         break;
544
545       default:
546         THROW_IMPOSSIBLE;
547         break;
548     }
549     req->process_sleep.result = state;
550     req->issuer->waiting_action = NULL;
551     SIMIX_request_answer(req);
552   }
553   SIMIX_process_sleep_destroy(action);
554 }
555
556 void SIMIX_process_sleep_destroy(smx_action_t action)
557 {
558   XBT_DEBUG("Destroy action %p", action);
559   if (action->sleep.surf_sleep)
560     action->sleep.surf_sleep->model_type->action_unref(action->sleep.surf_sleep);
561   xbt_mallocator_release(simix_global->action_mallocator, action);
562 }
563
564 void SIMIX_process_sleep_suspend(smx_action_t action)
565 {
566   surf_workstation_model->suspend(action->sleep.surf_sleep);
567 }
568
569 void SIMIX_process_sleep_resume(smx_action_t action)
570 {
571   surf_workstation_model->resume(action->sleep.surf_sleep);
572 }
573
574 /** 
575  * Calling this function makes the process to yield.
576  * Only the processes can call this function, giving back the control to maestro
577  */
578 void SIMIX_process_yield(void)
579 {
580   smx_process_t self = SIMIX_process_self();
581
582   XBT_DEBUG("Yield process '%s'", self->name);
583
584   /* Go into sleep and return control to maestro */
585   SIMIX_context_suspend(self->context);
586
587   /* Ok, maestro returned control to us */
588   XBT_DEBUG("Control returned to me: '%s'", self->name);
589
590   if (self->context->iwannadie){
591     XBT_DEBUG("I wanna die!");
592     SIMIX_context_stop(self->context);
593   }
594
595   if (self->doexception) {
596     XBT_DEBUG("Wait, maestro left me an exception");
597     self->doexception = 0;
598     RETHROW;
599   }
600   
601   if (self->new_host) {
602     SIMIX_process_change_host(self, self->new_host);
603     self->new_host = NULL;
604   }
605 }
606
607 /* callback: context fetching */
608 xbt_running_ctx_t *SIMIX_process_get_running_context(void)
609 {
610   return SIMIX_process_self()->running_ctx;
611 }
612
613 /* callback: termination */
614 void SIMIX_process_exception_terminate(xbt_ex_t * e)
615 {
616   xbt_ex_display(e);
617   abort();
618 }
619
620 smx_context_t SIMIX_process_get_context(smx_process_t p) {
621   return p->context;
622 }
623
624 void SIMIX_process_set_context(smx_process_t p,smx_context_t c) {
625   p->context = c;
626 }