Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
When a process leaves, no one should ever finish a communication with it
[simgrid.git] / src / simix / smx_process.c
1 /* Copyright (c) 2007, 2008, 2009, 2010. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "private.h"
8 #include "xbt/sysdep.h"
9 #include "xbt/log.h"
10 #include "xbt/dict.h"
11 #include "mc/mc.h"
12
13 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_process, simix,
14                                 "Logging specific to SIMIX (process)");
15
16 unsigned long simix_process_maxpid = 0;
17
18 /**
19  * \brief Returns the current agent.
20  *
21  * This functions returns the currently running SIMIX process.
22  *
23  * \return The SIMIX process
24  */
25 XBT_INLINE smx_process_t SIMIX_process_self(void)
26 {
27   smx_context_t self_context = SIMIX_context_self();
28
29   return self_context ? SIMIX_context_get_data(self_context) : NULL;
30 }
31
32 /**
33  * \brief Move a process to the list of processes to destroy.
34  */
35 void SIMIX_process_cleanup(smx_process_t process)
36 {
37   XBT_DEBUG("Cleanup process %s (%p), waiting action %p",
38       process->name, process, process->waiting_action);
39
40   /* cancel non-blocking communications */
41   smx_action_t action;
42   while ((action = xbt_fifo_pop(process->comms))) {
43
44     /* make sure no one will finish the comm after this process is destroyed,
45      * because src_proc or dst_proc would be an invalid pointer */
46     SIMIX_comm_cancel(action);
47
48     if (action->comm.src_proc == process) {
49       XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d, src = %p, dst = %p",
50           action, action->comm.detached, action->state, action->comm.src_proc, action->comm.dst_proc);
51       action->comm.src_proc = NULL;
52
53       if (action->comm.detached) {
54          if (action->comm.refcount == 0) {
55            XBT_DEBUG("Increase the refcount before destroying it since it's detached");
56            /* I'm not supposed to destroy a detached comm from the sender side,
57             * unless there is no receiver matching the rdv */
58            action->comm.refcount++;
59            SIMIX_comm_destroy(action);
60          }
61          else {
62            XBT_DEBUG("Don't destroy it since its refcount is %d", action->comm.refcount);
63          }
64       } else {
65         SIMIX_comm_destroy(action);
66       }
67     }
68     else if (action->comm.dst_proc == process){
69       XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p",
70           action, action->state, action->comm.src_proc, action->comm.dst_proc);
71       action->comm.dst_proc = NULL;
72
73       if (action->comm.detached && action->comm.refcount == 1
74           && action->comm.src_proc != NULL) {
75         /* the comm will be freed right now, remove it from the sender */
76         xbt_fifo_remove(action->comm.src_proc->comms, action);
77       }
78       SIMIX_comm_destroy(action);
79     }
80     else {
81       xbt_die("Communication action %p is in my list but I'm not the sender "
82           "or the receiver", action);
83     }
84   }
85
86   /*xbt_swag_remove(process, simix_global->process_to_run);*/
87   xbt_swag_remove(process, simix_global->process_list);
88   xbt_swag_remove(process, process->smx_host->process_list);
89   xbt_swag_insert(process, simix_global->process_to_destroy);
90 }
91
92 /** 
93  * Garbage collection
94  *
95  * Should be called some time to time to free the memory allocated for processes
96  * that have finished (or killed).
97  */
98 void SIMIX_process_empty_trash(void)
99 {
100   smx_process_t process = NULL;
101
102   while ((process = xbt_swag_extract(simix_global->process_to_destroy))) {
103     SIMIX_context_free(process->context);
104
105     /* Free the exception allocated at creation time */
106     free(process->running_ctx);
107     xbt_dict_free(&process->properties);
108
109     xbt_fifo_free(process->comms);
110
111     free(process->name);
112     free(process);
113   }
114 }
115
116 /**
117  * \brief Creates and runs the maestro process
118  */
119 void SIMIX_create_maestro_process()
120 {
121   smx_process_t maestro = NULL;
122
123   /* Create maestro process and intilialize it */
124   maestro = xbt_new0(s_smx_process_t, 1);
125   maestro->pid = simix_process_maxpid++;
126   maestro->name = (char *) "";
127   maestro->running_ctx = xbt_new(xbt_running_ctx_t, 1);
128   XBT_RUNNING_CTX_INITIALIZE(maestro->running_ctx);
129   maestro->context = SIMIX_context_new(NULL, 0, NULL, NULL, maestro);
130   maestro->request.issuer = maestro;
131
132   simix_global->maestro_process = maestro;
133   return;
134 }
135
136 /**
137  * \brief Same as SIMIX_process_create() but with only one argument (used by timers).
138  * \return the process created
139  */
140 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
141
142   smx_process_t process;
143   simix_global->create_process_function(
144       &process,
145       args->name,
146       args->code,
147       args->data,
148       args->hostname,
149       args->argc,
150       args->argv,
151       args->properties);
152
153   return process;
154 }
155
156 /**
157  * \brief Internal function to create a process.
158  *
159  * This function actually creates the process.
160  * It may be called when a REQ_PROCESS_CREATE request occurs,
161  * or directly for SIMIX internal purposes.
162  *
163  * \return the process created
164  */
165 void SIMIX_process_create(smx_process_t *process,
166                           const char *name,
167                           xbt_main_func_t code,
168                           void *data,
169                           const char *hostname,
170                           int argc, char **argv,
171                           xbt_dict_t properties) {
172
173   *process = NULL;
174   smx_host_t host = SIMIX_host_get_by_name(hostname);
175
176   XBT_DEBUG("Start process %s on host %s", name, hostname);
177
178   if (!SIMIX_host_get_state(host)) {
179     XBT_WARN("Cannot launch process '%s' on failed host '%s'", name,
180           hostname);
181   }
182   else {
183     *process = xbt_new0(s_smx_process_t, 1);
184
185     xbt_assert(((code != NULL) && (host != NULL)), "Invalid parameters");
186
187     /* Process data */
188     (*process)->pid = simix_process_maxpid++;
189     (*process)->name = xbt_strdup(name);
190     (*process)->smx_host = host;
191     (*process)->data = data;
192     (*process)->comms = xbt_fifo_new();
193     (*process)->request.issuer = *process;
194
195     XBT_VERB("Create context %s", (*process)->name);
196     (*process)->context = SIMIX_context_new(code, argc, argv,
197         simix_global->cleanup_process_function, *process);
198
199     (*process)->running_ctx = xbt_new(xbt_running_ctx_t, 1);
200     XBT_RUNNING_CTX_INITIALIZE((*process)->running_ctx);
201
202     /* Add properties */
203     (*process)->properties = properties;
204
205     /* Add the process to it's host process list */
206     xbt_swag_insert(*process, host->process_list);
207
208     XBT_DEBUG("Start context '%s'", (*process)->name);
209
210     /* Now insert it in the global process list and in the process to run list */
211     xbt_swag_insert(*process, simix_global->process_list);
212     XBT_DEBUG("Inserting %s(%s) in the to_run list", (*process)->name, host->name);
213     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, *process);
214   }
215 }
216
217 /**
218  * \brief Executes the processes from simix_global->process_to_run.
219  *
220  * The processes of simix_global->process_to_run are run (in parallel if
221  * possible).  On exit, simix_global->process_to_run is empty, and
222  * simix_global->process_that_ran contains the list of processes that just ran.
223  * The two lists are swapped so, be careful when using them before and after a
224  * call to this function.
225  */
226 void SIMIX_process_runall(void)
227 {
228   SIMIX_context_runall();
229
230   xbt_dynar_t tmp = simix_global->process_that_ran;
231   simix_global->process_that_ran = simix_global->process_to_run;
232   simix_global->process_to_run = tmp;
233   xbt_dynar_reset(simix_global->process_to_run);
234 }
235
236 /**
237  * \brief Internal function to kill a SIMIX process.
238  *
239  * This function may be called when a REQ_PROCESS_KILL request occurs,
240  * or directly for SIMIX internal purposes.
241  *
242  * \param process poor victim
243  */
244 void SIMIX_process_kill(smx_process_t process) {
245
246   XBT_DEBUG("Killing process %s on %s", process->name, process->smx_host->name);
247
248   process->context->iwannadie = 1;
249   process->blocked = 0;
250   process->suspended = 0;
251   /* FIXME: set doexception to 0 also? */
252
253   /* destroy the blocking action if any */
254   if (process->waiting_action) {
255
256     switch (process->waiting_action->type) {
257
258       case SIMIX_ACTION_EXECUTE:          
259       case SIMIX_ACTION_PARALLEL_EXECUTE:
260         SIMIX_host_execution_destroy(process->waiting_action);
261         break;
262
263       case SIMIX_ACTION_COMMUNICATE:
264         SIMIX_comm_destroy(process->waiting_action);
265         break;
266
267       case SIMIX_ACTION_SLEEP:
268         SIMIX_process_sleep_destroy(process->waiting_action);
269         break;
270
271       case SIMIX_ACTION_SYNCHRO:
272         SIMIX_synchro_stop_waiting(process, &process->request);
273         SIMIX_synchro_destroy(process->waiting_action);
274         break;
275
276       case SIMIX_ACTION_IO:
277         THROW_UNIMPLEMENTED;
278         break;
279     }
280   }
281
282   xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
283 }
284
285 /**
286  * \brief Kills all running processes.
287  * \param issuer this one will not be killed
288  */
289 void SIMIX_process_killall(smx_process_t issuer)
290 {
291   smx_process_t p = NULL;
292
293   while ((p = xbt_swag_extract(simix_global->process_list))) {
294     if (p != issuer) {
295       SIMIX_process_kill(p);
296     }
297   }
298
299   SIMIX_context_runall(simix_global->process_to_run);
300
301   SIMIX_process_empty_trash();
302 }
303
304 void SIMIX_process_change_host(smx_process_t process,
305                                smx_host_t dest)
306 {
307   xbt_assert((process != NULL), "Invalid parameters");
308   xbt_swag_remove(process, process->smx_host->process_list);
309   process->smx_host = dest;
310   xbt_swag_insert(process, dest->process_list);
311 }
312
313 void SIMIX_pre_process_change_host(smx_process_t process, smx_host_t dest)
314 {
315   process->new_host = dest;
316 }
317
318 void SIMIX_pre_process_suspend(smx_req_t req)
319 {
320   smx_process_t process = req->process_suspend.process;
321   SIMIX_process_suspend(process, req->issuer);
322
323   if (process != req->issuer) {
324     SIMIX_request_answer(req);
325   }
326   /* If we are suspending ourselves, then just do not replay the request. */
327 }
328
329 void SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
330 {
331   xbt_assert((process != NULL), "Invalid parameters");
332
333   if (process->suspended) {
334     XBT_DEBUG("Process '%s' is already suspended", process->name);
335     return;
336   }
337
338   process->suspended = 1;
339
340   /* If we are suspending another process, and it is waiting on an action,
341      suspend its action. */
342   if (process != issuer) {
343
344     if (process->waiting_action) {
345
346       switch (process->waiting_action->type) {
347
348         case SIMIX_ACTION_EXECUTE:
349         case SIMIX_ACTION_PARALLEL_EXECUTE:
350           SIMIX_host_execution_suspend(process->waiting_action);
351           break;
352
353         case SIMIX_ACTION_COMMUNICATE:
354           SIMIX_comm_suspend(process->waiting_action);
355           break;
356
357         case SIMIX_ACTION_SLEEP:
358           SIMIX_process_sleep_suspend(process->waiting_action);
359           break;
360
361         default:
362           xbt_die("Internal error in SIMIX_process_suspend: unexpected action type %d",
363               process->waiting_action->type);
364       }
365     }
366   }
367 }
368
369 void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
370 {
371   xbt_assert((process != NULL), "Invalid parameters");
372
373   if (!process->suspended) {
374     XBT_DEBUG("Process '%s' is not suspended", process->name);
375     return;
376   }
377
378   process->suspended = 0;
379
380   /* If we are resuming another process, resume the action it was waiting for
381      if any. Otherwise add it to the list of process to run in the next round. */
382   if (process != issuer) {
383
384     if (process->waiting_action) {
385
386       switch (process->waiting_action->type) {
387
388         case SIMIX_ACTION_EXECUTE:          
389         case SIMIX_ACTION_PARALLEL_EXECUTE:
390           SIMIX_host_execution_resume(process->waiting_action);
391           break;
392
393         case SIMIX_ACTION_COMMUNICATE:
394           SIMIX_comm_resume(process->waiting_action);
395           break;
396
397         case SIMIX_ACTION_SLEEP:
398           SIMIX_process_sleep_resume(process->waiting_action);
399           break;
400
401         default:
402           xbt_die("Internal error in SIMIX_process_resume: unexpected action type %d",
403               process->waiting_action->type);
404       }
405     }
406     else {
407       xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
408     }
409   }
410 }
411
412 int SIMIX_process_get_maxpid(void) {
413   return simix_process_maxpid;
414 }
415
416 int SIMIX_process_count(void)
417 {
418   return xbt_swag_size(simix_global->process_list);
419 }
420
421 void* SIMIX_process_self_get_data(smx_process_t self)
422 {
423   xbt_assert(self == SIMIX_process_self(), "This is not the current process");
424
425   if (!self) {
426     return NULL;
427   }
428   return SIMIX_process_get_data(self);
429 }
430
431 void SIMIX_process_self_set_data(smx_process_t self, void *data)
432 {
433   xbt_assert(self == SIMIX_process_self(), "This is not the current process");
434
435   SIMIX_process_set_data(self, data);
436 }
437
438 void* SIMIX_process_get_data(smx_process_t process)
439 {
440   return process->data;
441 }
442
443 void SIMIX_process_set_data(smx_process_t process, void *data)
444 {
445   process->data = data;
446 }
447
448 smx_host_t SIMIX_process_get_host(smx_process_t process)
449 {
450   return process->smx_host;
451 }
452
453 /* needs to be public and without request because it is called
454    by exceptions and logging events */
455 const char* SIMIX_process_self_get_name(void) {
456
457   smx_process_t process = SIMIX_process_self();
458   if (process == NULL || process == simix_global->maestro_process)
459     return "";
460
461   return SIMIX_process_get_name(process);
462 }
463
464 const char* SIMIX_process_get_name(smx_process_t process)
465 {
466   return process->name;
467 }
468
469 smx_process_t SIMIX_process_get_by_name(const char* name)
470 {
471   smx_process_t proc;
472
473   xbt_swag_foreach(proc, simix_global->process_list)
474   {
475     if(!strcmp(name, proc->name))
476       return proc;
477   }
478   return NULL;
479 }
480
481 int SIMIX_process_is_suspended(smx_process_t process)
482 {
483   return process->suspended;
484 }
485
486 xbt_dict_t SIMIX_process_get_properties(smx_process_t process)
487 {
488   return process->properties;
489 }
490
491 void SIMIX_pre_process_sleep(smx_req_t req)
492 {
493   if (MC_IS_ENABLED) {
494     MC_process_clock_add(req->issuer, req->process_sleep.duration);
495     req->process_sleep.result = SIMIX_DONE;
496     SIMIX_request_answer(req);
497     return;
498   }
499   smx_action_t action = SIMIX_process_sleep(req->issuer, req->process_sleep.duration);
500   xbt_fifo_push(action->request_list, req);
501   req->issuer->waiting_action = action;
502 }
503
504 smx_action_t SIMIX_process_sleep(smx_process_t process, double duration)
505 {
506   smx_action_t action;
507   smx_host_t host = process->smx_host;
508
509   /* check if the host is active */
510   if (surf_workstation_model->extension.
511       workstation.get_state(host->host) != SURF_RESOURCE_ON) {
512     THROWF(host_error, 0, "Host %s failed, you cannot call this function",
513            host->name);
514   }
515
516   action = xbt_mallocator_get(simix_global->action_mallocator);
517   action->type = SIMIX_ACTION_SLEEP;
518   action->name = NULL;
519 #ifdef HAVE_TRACING
520   action->category = NULL;
521 #endif
522
523   action->sleep.host = host;
524   action->sleep.surf_sleep =
525       surf_workstation_model->extension.workstation.sleep(host->host, duration);
526
527   surf_workstation_model->action_data_set(action->sleep.surf_sleep, action);
528   XBT_DEBUG("Create sleep action %p", action);
529
530   return action;
531 }
532
533 void SIMIX_post_process_sleep(smx_action_t action)
534 {
535   smx_req_t req;
536   e_smx_state_t state;
537
538   while ((req = xbt_fifo_shift(action->request_list))) {
539
540     switch(surf_workstation_model->action_state_get(action->sleep.surf_sleep)){
541       case SURF_ACTION_FAILED:
542         state = SIMIX_SRC_HOST_FAILURE;
543         break;
544
545       case SURF_ACTION_DONE:
546         state = SIMIX_DONE;
547         break;
548
549       default:
550         THROW_IMPOSSIBLE;
551         break;
552     }
553     req->process_sleep.result = state;
554     req->issuer->waiting_action = NULL;
555     SIMIX_request_answer(req);
556   }
557   SIMIX_process_sleep_destroy(action);
558 }
559
560 void SIMIX_process_sleep_destroy(smx_action_t action)
561 {
562   XBT_DEBUG("Destroy action %p", action);
563   if (action->sleep.surf_sleep)
564     action->sleep.surf_sleep->model_type->action_unref(action->sleep.surf_sleep);
565   xbt_mallocator_release(simix_global->action_mallocator, action);
566 }
567
568 void SIMIX_process_sleep_suspend(smx_action_t action)
569 {
570   surf_workstation_model->suspend(action->sleep.surf_sleep);
571 }
572
573 void SIMIX_process_sleep_resume(smx_action_t action)
574 {
575   surf_workstation_model->resume(action->sleep.surf_sleep);
576 }
577
578 /** 
579  * \brief Calling this function makes the process to yield.
580  *
581  * Only the current process can call this function, giving back the control to
582  * maestro.
583  *
584  * \param self the current process
585  */
586 void SIMIX_process_yield(smx_process_t self)
587 {
588   XBT_DEBUG("Yield process '%s'", self->name);
589
590   /* Go into sleep and return control to maestro */
591   SIMIX_context_suspend(self->context);
592
593   /* Ok, maestro returned control to us */
594   XBT_DEBUG("Control returned to me: '%s'", self->name);
595
596   if (self->context->iwannadie){
597     XBT_DEBUG("I wanna die!");
598     SIMIX_context_stop(self->context);
599   }
600
601   if (self->doexception) {
602     XBT_DEBUG("Wait, maestro left me an exception");
603     self->doexception = 0;
604     RETHROW;
605   }
606   
607   if (self->new_host) {
608     SIMIX_process_change_host(self, self->new_host);
609     self->new_host = NULL;
610   }
611 }
612
613 /* callback: context fetching */
614 xbt_running_ctx_t *SIMIX_process_get_running_context(void)
615 {
616   return SIMIX_process_self()->running_ctx;
617 }
618
619 /* callback: termination */
620 void SIMIX_process_exception_terminate(xbt_ex_t * e)
621 {
622   xbt_ex_display(e);
623   abort();
624 }
625
626 smx_context_t SIMIX_process_get_context(smx_process_t p) {
627   return p->context;
628 }
629
630 void SIMIX_process_set_context(smx_process_t p,smx_context_t c) {
631   p->context = c;
632 }
633
634 /**
635  * \brief Returns the list of processes to run.
636  */
637 XBT_INLINE xbt_dynar_t SIMIX_process_get_runnable(void)
638 {
639   return simix_global->process_to_run;
640 }