Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Simix: clean unfinished comms when terminating a process
[simgrid.git] / src / simix / smx_process.c
1 /* Copyright (c) 2007, 2008, 2009, 2010. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "private.h"
8 #include "xbt/sysdep.h"
9 #include "xbt/log.h"
10 #include "xbt/dict.h"
11 #include "msg/mailbox.h"
12 #include "mc/mc.h"
13
14 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_process, simix,
15                                 "Logging specific to SIMIX (process)");
16
17 unsigned long simix_process_maxpid = 0;
18
19 /**
20  * \brief Returns the current agent.
21  *
22  * This functions returns the currently running SIMIX process.
23  *
24  * \return The SIMIX process
25  */
26 XBT_INLINE smx_process_t SIMIX_process_self(void)
27 {
28   smx_context_t self_context = SIMIX_context_self();
29
30   return self_context ? SIMIX_context_get_data(self_context) : NULL;
31 }
32
33 /**
34  * \brief Move a process to the list of processes to destroy.
35  */
36 void SIMIX_process_cleanup(smx_process_t process)
37 {
38   XBT_DEBUG("Cleanup process %s (%p), waiting action %p",
39       process->name, process, process->waiting_action);
40
41   /* cancel non-blocking communications */
42   smx_action_t action;
43   while ((action = xbt_fifo_pop(process->comms))) {
44
45     /* make sure no one will finish the comm after this process is destroyed */
46     SIMIX_comm_cancel(action);
47
48     if (action->comm.src_proc == process) {
49       XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d",
50           action, action->comm.detached, action->state);
51       action->comm.src_proc = NULL;
52
53       if (action->comm.detached) {
54         /* the receiver was supposed to destroy the comm after completion,
55          * but the comm will actually never finish */
56         action->comm.refcount++;
57       }
58     }
59     else if (action->comm.dst_proc == process){
60       XBT_DEBUG("Found an unfinished recv comm %p, state %d", action, action->state);
61       action->comm.dst_proc = NULL;
62     }
63     else {
64       THROW_IMPOSSIBLE;
65     }
66
67     SIMIX_comm_destroy(action);
68   }
69
70   /*xbt_swag_remove(process, simix_global->process_to_run);*/
71   xbt_swag_remove(process, simix_global->process_list);
72   xbt_swag_remove(process, process->smx_host->process_list);
73   xbt_swag_insert(process, simix_global->process_to_destroy);
74 }
75
76 /** 
77  * Garbage collection
78  *
79  * Should be called some time to time to free the memory allocated for processes
80  * that have finished (or killed).
81  */
82 void SIMIX_process_empty_trash(void)
83 {
84   smx_process_t process = NULL;
85
86   while ((process = xbt_swag_extract(simix_global->process_to_destroy))) {
87     SIMIX_context_free(process->context);
88
89     /* Free the exception allocated at creation time */
90     if (process->running_ctx)
91       free(process->running_ctx);
92     if (process->properties)
93       xbt_dict_free(&process->properties);
94
95     xbt_fifo_free(process->comms);
96
97     free(process->name);
98     process->name = NULL;
99     free(process);
100   }
101 }
102
103 /**
104  * \brief Creates and runs the maestro process
105  */
106 void SIMIX_create_maestro_process()
107 {
108   smx_process_t maestro = NULL;
109
110   /* Create maestro process and intilialize it */
111   maestro = xbt_new0(s_smx_process_t, 1);
112   maestro->pid = simix_process_maxpid++;
113   maestro->name = (char *) "";
114   maestro->running_ctx = xbt_new(xbt_running_ctx_t, 1);
115   XBT_RUNNING_CTX_INITIALIZE(maestro->running_ctx);
116   maestro->context = SIMIX_context_new(NULL, 0, NULL, NULL, maestro);
117   maestro->request.issuer = maestro;
118
119   simix_global->maestro_process = maestro;
120   return;
121 }
122
123 /**
124  * \brief Same as SIMIX_process_create() but with only one argument (used by timers).
125  * \return the process created
126  */
127 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
128
129   smx_process_t process;
130   simix_global->create_process_function(
131       &process,
132       args->name,
133       args->code,
134       args->data,
135       args->hostname,
136       args->argc,
137       args->argv,
138       args->properties);
139
140   return process;
141 }
142
143 /**
144  * \brief Internal function to create a process.
145  *
146  * This function actually creates the process.
147  * It may be called when a REQ_PROCESS_CREATE request occurs,
148  * or directly for SIMIX internal purposes.
149  *
150  * \return the process created
151  */
152 void SIMIX_process_create(smx_process_t *process,
153                           const char *name,
154                           xbt_main_func_t code,
155                           void *data,
156                           const char *hostname,
157                           int argc, char **argv,
158                           xbt_dict_t properties) {
159
160   *process = NULL;
161   smx_host_t host = SIMIX_host_get_by_name(hostname);
162
163   XBT_DEBUG("Start process %s on host %s", name, hostname);
164
165   if (!SIMIX_host_get_state(host)) {
166     XBT_WARN("Cannot launch process '%s' on failed host '%s'", name,
167           hostname);
168   }
169   else {
170     *process = xbt_new0(s_smx_process_t, 1);
171
172     xbt_assert(((code != NULL) && (host != NULL)), "Invalid parameters");
173
174     /* Process data */
175     (*process)->pid = simix_process_maxpid++;
176     (*process)->name = xbt_strdup(name);
177     (*process)->smx_host = host;
178     (*process)->data = data;
179     (*process)->comms = xbt_fifo_new();
180
181     XBT_VERB("Create context %s", (*process)->name);
182     (*process)->context = SIMIX_context_new(code, argc, argv,
183         simix_global->cleanup_process_function, *process);
184
185     (*process)->running_ctx = xbt_new(xbt_running_ctx_t, 1);
186     XBT_RUNNING_CTX_INITIALIZE((*process)->running_ctx);
187
188     /* Add properties */
189     (*process)->properties = properties;
190
191     /* Add the process to it's host process list */
192     xbt_swag_insert(*process, host->process_list);
193
194     XBT_DEBUG("Start context '%s'", (*process)->name);
195
196     /* Now insert it in the global process list and in the process to run list */
197     xbt_swag_insert(*process, simix_global->process_list);
198     XBT_DEBUG("Inserting %s(%s) in the to_run list", (*process)->name, host->name);
199     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, *process);
200   }
201 }
202
203 /**
204  * \brief Executes the processes from simix_global->process_to_run.
205  *
206  * The processes of simix_global->process_to_run are run (in parallel if
207  * possible).  On exit, simix_global->process_to_run is empty, and
208  * simix_global->process_that_ran contains the list of processes that just ran.
209  * The two lists are swapped so, be careful when using them before and after a
210  * call to this function.
211  */
212 void SIMIX_process_runall(void)
213 {
214   SIMIX_context_runall(simix_global->process_to_run);
215   xbt_dynar_t tmp = simix_global->process_that_ran;
216   simix_global->process_that_ran = simix_global->process_to_run;
217   simix_global->process_to_run = tmp;
218   xbt_dynar_reset(simix_global->process_to_run);
219 }
220
221 /**
222  * \brief Internal function to kill a SIMIX process.
223  *
224  * This function may be called when a REQ_PROCESS_KILL request occurs,
225  * or directly for SIMIX internal purposes.
226  *
227  * \param process poor victim
228  */
229 void SIMIX_process_kill(smx_process_t process) {
230
231   XBT_DEBUG("Killing process %s on %s", process->name, process->smx_host->name);
232
233   process->context->iwannadie = 1;
234   process->blocked = 0;
235   process->suspended = 0;
236   /* FIXME: set doexception to 0 also? */
237
238   /* destroy the blocking action if any */
239   if (process->waiting_action) {
240
241     switch (process->waiting_action->type) {
242
243       case SIMIX_ACTION_EXECUTE:          
244       case SIMIX_ACTION_PARALLEL_EXECUTE:
245         SIMIX_host_execution_destroy(process->waiting_action);
246         break;
247
248       case SIMIX_ACTION_COMMUNICATE:
249         SIMIX_comm_destroy(process->waiting_action);
250         break;
251
252       case SIMIX_ACTION_SLEEP:
253         SIMIX_process_sleep_destroy(process->waiting_action);
254         break;
255
256       case SIMIX_ACTION_SYNCHRO:
257         SIMIX_synchro_stop_waiting(process, &process->request);
258         SIMIX_synchro_destroy(process->waiting_action);
259         break;
260
261       case SIMIX_ACTION_IO:
262         THROW_UNIMPLEMENTED;
263         break;
264     }
265   }
266
267   xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
268 }
269
270 /**
271  * \brief Kills all running processes.
272  * \param issuer this one will not be killed
273  */
274 void SIMIX_process_killall(smx_process_t issuer)
275 {
276   smx_process_t p = NULL;
277
278   while ((p = xbt_swag_extract(simix_global->process_list))) {
279     if (p != issuer) {
280       SIMIX_process_kill(p);
281     }
282   }
283
284   SIMIX_context_runall(simix_global->process_to_run);
285
286   SIMIX_process_empty_trash();
287 }
288
289 void SIMIX_process_change_host(smx_process_t process,
290                                smx_host_t dest)
291 {
292   xbt_assert((process != NULL), "Invalid parameters");
293   xbt_swag_remove(process, process->smx_host->process_list);
294   process->smx_host = dest;
295   xbt_swag_insert(process, dest->process_list);
296 }
297
298 void SIMIX_pre_process_change_host(smx_process_t process, smx_host_t dest)
299 {
300   process->new_host = dest;
301 }
302
303 void SIMIX_pre_process_suspend(smx_req_t req)
304 {
305   smx_process_t process = req->process_suspend.process;
306   SIMIX_process_suspend(process, req->issuer);
307
308   if (process != req->issuer) {
309     SIMIX_request_answer(req);
310   }
311   /* If we are suspending ourselves, then just do not replay the request. */
312 }
313
314 void SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
315 {
316   process->suspended = 1;
317
318   /* If we are suspending another process, and it is waiting on an action,
319      suspend it's action. */
320   if (process != issuer) {
321
322     if (process->waiting_action) {
323
324       switch (process->waiting_action->type) {
325
326         case SIMIX_ACTION_EXECUTE:
327         case SIMIX_ACTION_PARALLEL_EXECUTE:
328           SIMIX_host_execution_suspend(process->waiting_action);
329           break;
330
331         case SIMIX_ACTION_COMMUNICATE:
332           SIMIX_comm_suspend(process->waiting_action);
333           break;
334
335         case SIMIX_ACTION_SLEEP:
336           SIMIX_process_sleep_suspend(process->waiting_action);
337           break;
338
339         default:
340           THROW_IMPOSSIBLE;
341       }
342     }
343   }
344 }
345
346 void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
347 {
348   xbt_assert((process != NULL), "Invalid parameters");
349
350   process->suspended = 0;
351
352   /* If we are resuming another process, resume the action it was waiting for
353      if any. Otherwise add it to the list of process to run in the next round. */
354   if (process != issuer) {
355
356     if (process->waiting_action) {
357
358       switch (process->waiting_action->type) {
359
360         case SIMIX_ACTION_EXECUTE:          
361         case SIMIX_ACTION_PARALLEL_EXECUTE:
362           SIMIX_host_execution_resume(process->waiting_action);
363           break;
364
365         case SIMIX_ACTION_COMMUNICATE:
366           SIMIX_comm_resume(process->waiting_action);
367           break;
368
369         case SIMIX_ACTION_SLEEP:
370           SIMIX_process_sleep_resume(process->waiting_action);
371           break;
372
373         default:
374           THROW_IMPOSSIBLE;
375       }
376     }
377     else {
378       xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, process);
379     }
380   }
381 }
382
383 int SIMIX_process_get_maxpid(void) {
384   return simix_process_maxpid;
385 }
386
387 int SIMIX_process_count(void)
388 {
389   return xbt_swag_size(simix_global->process_list);
390 }
391
392 void* SIMIX_process_self_get_data(void)
393 {
394   smx_process_t me = SIMIX_process_self();
395   if (!me) {
396     return NULL;
397   }
398   return SIMIX_process_get_data(me);
399 }
400
401 void SIMIX_process_self_set_data(void *data)
402 {
403   SIMIX_process_set_data(SIMIX_process_self(), data);
404 }
405
406 void* SIMIX_process_get_data(smx_process_t process)
407 {
408   return process->data;
409 }
410
411 void SIMIX_process_set_data(smx_process_t process, void *data)
412 {
413   process->data = data;
414 }
415
416 smx_host_t SIMIX_process_get_host(smx_process_t process)
417 {
418   return process->smx_host;
419 }
420
421 /* needs to be public and without request because it is called
422    by exceptions and logging events */
423 const char* SIMIX_process_self_get_name(void) {
424
425   smx_process_t process = SIMIX_process_self();
426   if (process == NULL || process == simix_global->maestro_process)
427     return "";
428
429   return SIMIX_process_get_name(process);
430 }
431
432 const char* SIMIX_process_get_name(smx_process_t process)
433 {
434   return process->name;
435 }
436
437 smx_process_t SIMIX_process_get_by_name(const char* name)
438 {
439   smx_process_t proc;
440
441   xbt_swag_foreach(proc, simix_global->process_list)
442   {
443     if(!strcmp(name, proc->name))
444       return proc;
445   }
446   return NULL;
447 }
448
449 int SIMIX_process_is_suspended(smx_process_t process)
450 {
451   return process->suspended;
452 }
453
454 xbt_dict_t SIMIX_process_get_properties(smx_process_t process)
455 {
456   return process->properties;
457 }
458
459 void SIMIX_pre_process_sleep(smx_req_t req)
460 {
461   if (MC_IS_ENABLED) {
462     MC_process_clock_add(req->issuer, req->process_sleep.duration);
463     req->process_sleep.result = SIMIX_DONE;
464     SIMIX_request_answer(req);
465     return;
466   }
467   smx_action_t action = SIMIX_process_sleep(req->issuer, req->process_sleep.duration);
468   xbt_fifo_push(action->request_list, req);
469   req->issuer->waiting_action = action;
470 }
471
472 smx_action_t SIMIX_process_sleep(smx_process_t process, double duration)
473 {
474   smx_action_t action;
475   smx_host_t host = process->smx_host;
476
477   /* check if the host is active */
478   if (surf_workstation_model->extension.
479       workstation.get_state(host->host) != SURF_RESOURCE_ON) {
480     THROWF(host_error, 0, "Host %s failed, you cannot call this function",
481            host->name);
482   }
483
484   action = xbt_mallocator_get(simix_global->action_mallocator);
485   action->type = SIMIX_ACTION_SLEEP;
486   action->name = NULL;
487 #ifdef HAVE_TRACING
488   action->category = NULL;
489 #endif
490
491   action->sleep.host = host;
492   action->sleep.surf_sleep =
493       surf_workstation_model->extension.workstation.sleep(host->host, duration);
494
495   surf_workstation_model->action_data_set(action->sleep.surf_sleep, action);
496   XBT_DEBUG("Create sleep action %p", action);
497
498   return action;
499 }
500
501 void SIMIX_post_process_sleep(smx_action_t action)
502 {
503   smx_req_t req;
504   e_smx_state_t state;
505
506   while ((req = xbt_fifo_shift(action->request_list))) {
507
508     switch(surf_workstation_model->action_state_get(action->sleep.surf_sleep)){
509       case SURF_ACTION_FAILED:
510         state = SIMIX_SRC_HOST_FAILURE;
511         break;
512
513       case SURF_ACTION_DONE:
514         state = SIMIX_DONE;
515         break;
516
517       default:
518         THROW_IMPOSSIBLE;
519         break;
520     }
521     req->process_sleep.result = state;
522     req->issuer->waiting_action = NULL;
523     SIMIX_request_answer(req);
524   }
525   SIMIX_process_sleep_destroy(action);
526 }
527
528 void SIMIX_process_sleep_destroy(smx_action_t action)
529 {
530   XBT_DEBUG("Destroy action %p", action);
531   if (action->sleep.surf_sleep)
532     action->sleep.surf_sleep->model_type->action_unref(action->sleep.surf_sleep);
533   xbt_mallocator_release(simix_global->action_mallocator, action);
534 }
535
536 void SIMIX_process_sleep_suspend(smx_action_t action)
537 {
538   surf_workstation_model->suspend(action->sleep.surf_sleep);
539 }
540
541 void SIMIX_process_sleep_resume(smx_action_t action)
542 {
543   surf_workstation_model->resume(action->sleep.surf_sleep);
544 }
545
546 /** 
547  * Calling this function makes the process to yield.
548  * Only the processes can call this function, giving back the control to maestro
549  */
550 void SIMIX_process_yield(void)
551 {
552   smx_process_t self = SIMIX_process_self();
553
554   XBT_DEBUG("Yield process '%s'", self->name);
555
556   /* Go into sleep and return control to maestro */
557   SIMIX_context_suspend(self->context);
558
559   /* Ok, maestro returned control to us */
560   XBT_DEBUG("Maestro returned control to me: '%s'", self->name);
561
562   if (self->context->iwannadie){
563     XBT_DEBUG("I wanna die!");
564     SIMIX_context_stop(self->context);
565   }
566
567   if (self->doexception) {
568     XBT_DEBUG("Wait, maestro left me an exception");
569     self->doexception = 0;
570     RETHROW;
571   }
572   
573   if (self->new_host) {
574     SIMIX_process_change_host(self, self->new_host);
575     self->new_host = NULL;
576   }
577 }
578
579 /* callback: context fetching */
580 xbt_running_ctx_t *SIMIX_process_get_running_context(void)
581 {
582   return SIMIX_process_self()->running_ctx;
583 }
584
585 /* callback: termination */
586 void SIMIX_process_exception_terminate(xbt_ex_t * e)
587 {
588   xbt_ex_display(e);
589   abort();
590 }
591
592 smx_context_t SIMIX_process_get_context(smx_process_t p) {
593   return p->context;
594 }
595
596 void SIMIX_process_set_context(smx_process_t p,smx_context_t c) {
597   p->context = c;
598 }