Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
cc4273b864c35d690ce856a96a0f30673f3535c3
[simgrid.git] / src / simix / smx_host.c
1 /* Copyright (c) 2007-2012. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "smx_private.h"
8 #include "xbt/sysdep.h"
9 #include "xbt/log.h"
10 #include "xbt/dict.h"
11 #include "mc/mc.h"
12
13 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_host, simix,
14                                 "Logging specific to SIMIX (hosts)");
15
16 xbt_dict_t watched_hosts_lib;
17
18 static void SIMIX_execution_finish(smx_action_t action);
19
20 /**
21  * \brief Internal function to create a SIMIX host.
22  * \param name name of the host to create
23  * \param workstation the SURF workstation to encapsulate
24  * \param data some user data (may be NULL)
25  */
26 smx_host_t SIMIX_host_create(const char *name,
27                                void *workstation, void *data)
28 {
29   smx_host_t smx_host = xbt_new0(s_smx_host_t, 1);
30   s_smx_process_t proc;
31
32   /* Host structure */
33   smx_host->name = xbt_strdup(name);
34   smx_host->data = data;
35   smx_host->host = workstation;
36   smx_host->process_list =
37       xbt_swag_new(xbt_swag_offset(proc, host_proc_hookup));
38
39   /* Update global variables */
40   xbt_lib_set(host_lib,smx_host->name,SIMIX_HOST_LEVEL,smx_host);
41
42   return smx_host;
43 }
44
45 /**
46  * \brief Internal function to destroy a SIMIX host.
47  *
48  * \param h the host to destroy (a smx_host_t)
49  */
50 void SIMIX_host_destroy(void *h)
51 {
52   smx_host_t host = (smx_host_t) h;
53
54   xbt_assert((host != NULL), "Invalid parameters");
55
56   /* Clean Simulator data */
57   if (xbt_swag_size(host->process_list) != 0) {
58     char *msg =
59         bprintf("Shutting down host %s, but it's not empty:", host->name);
60     char *tmp;
61     smx_process_t process = NULL;
62
63     xbt_swag_foreach(process, host->process_list) {
64       tmp = bprintf("%s\n\t%s", msg, process->name);
65       free(msg);
66       msg = tmp;
67     }
68     SIMIX_display_process_status();
69     THROWF(arg_error, 0, "%s", msg);
70   }
71   xbt_dynar_free(&host->auto_restart_processes);
72   xbt_swag_free(host->process_list);
73
74   /* Clean host structure */
75   free(host->name);
76   free(host);
77
78   return;
79 }
80
81 /**
82  * \brief Returns a dict of all hosts.
83  *
84  * \return List of all hosts (as a #xbt_dict_t)
85  */
86 xbt_dict_t SIMIX_host_get_dict(void)
87 {
88   xbt_dict_t host_dict = xbt_dict_new_homogeneous(NULL);
89   xbt_lib_cursor_t cursor = NULL;
90   char *name = NULL;
91   void **host = NULL;
92
93   xbt_lib_foreach(host_lib, cursor, name, host){
94     if(host[SIMIX_HOST_LEVEL])
95             xbt_dict_set(host_dict,name,host[SIMIX_HOST_LEVEL], NULL);
96   }
97   return host_dict;
98 }
99
100 smx_host_t SIMIX_host_get_by_name(const char *name)
101 {
102   xbt_assert(((simix_global != NULL)
103                && (host_lib != NULL)),
104               "Environment not set yet");
105
106   return xbt_lib_get_or_null(host_lib, name, SIMIX_HOST_LEVEL);
107 }
108
109 smx_host_t SIMIX_host_self(void)
110 {
111   smx_process_t process = SIMIX_process_self();
112   return (process == NULL) ? NULL : SIMIX_process_get_host(process);
113 }
114
115 /* needs to be public and without simcall because it is called
116    by exceptions and logging events */
117 const char* SIMIX_host_self_get_name(void)
118 {
119   smx_host_t host = SIMIX_host_self();
120   if (host == NULL || SIMIX_process_self() == simix_global->maestro_process)
121     return "";
122
123   return SIMIX_host_get_name(host);
124 }
125
126 const char* SIMIX_host_get_name(smx_host_t host)
127 {
128   xbt_assert((host != NULL), "Invalid parameters");
129
130   return host->name;
131 }
132
133 xbt_dict_t SIMIX_host_get_properties(smx_host_t host)
134 {
135   xbt_assert((host != NULL), "Invalid parameters (simix host is NULL)");
136
137   return surf_workstation_model->extension.workstation.get_properties(host->host);
138 }
139
140 double SIMIX_host_get_speed(smx_host_t host)
141 {
142   xbt_assert((host != NULL), "Invalid parameters (simix host is NULL)");
143
144   return surf_workstation_model->extension.workstation.
145       get_speed(host->host, 1.0);
146 }
147
148 double SIMIX_host_get_available_speed(smx_host_t host)
149 {
150   xbt_assert((host != NULL), "Invalid parameters (simix host is NULL)");
151
152   return surf_workstation_model->extension.workstation.
153       get_available_speed(host->host);
154 }
155
156 int SIMIX_host_get_state(smx_host_t host)
157 {
158   xbt_assert((host != NULL), "Invalid parameters (simix host is NULL)");
159
160   return surf_workstation_model->extension.workstation.
161       get_state(host->host);
162 }
163
164 void* SIMIX_host_self_get_data(void)
165 {
166   return SIMIX_host_get_data(SIMIX_host_self());
167 }
168
169 void SIMIX_host_self_set_data(void *data)
170 {
171   SIMIX_host_set_data(SIMIX_host_self(), data);
172 }
173
174 void* SIMIX_host_get_data(smx_host_t host)
175 {
176   xbt_assert((host != NULL), "Invalid parameters (simix host is NULL)");
177
178   return host->data;
179 }
180 void _SIMIX_host_free_process_arg(void *);
181 void _SIMIX_host_free_process_arg(void *data)
182 {
183   smx_process_arg_t arg = *(void**)data;
184   int i;
185   xbt_free(arg->name);
186   for (i = 0; i < arg->argc; i++) {
187     xbt_free(arg->argv[i]);
188   }
189   xbt_free(arg->argv);
190   xbt_free(arg);
191 }
192 /**
193  * \brief Add a process to the list of the processes that the host will restart when it comes back
194  * This function add a process to the list of the processes that will be restarted when the host comes
195  * back. It is expected that this function is called when the host is down.
196  * The processes will only be restarted once, meaning that you will have to register the process
197  * again to restart the process again.
198  */
199 void SIMIX_host_add_auto_restart_process(smx_host_t host,
200                                          const char *name,
201                                          xbt_main_func_t code,
202                                          void *data,
203                                          const char *hostname,
204                                          double kill_time,
205                                          int argc, char **argv,
206                                          xbt_dict_t properties,
207                                          int auto_restart)
208 {
209   if (!host->auto_restart_processes) {
210     host->auto_restart_processes = xbt_dynar_new(sizeof(smx_process_arg_t),_SIMIX_host_free_process_arg);
211   }
212   smx_process_arg_t arg = xbt_new(s_smx_process_arg_t,1);
213
214   arg->name = xbt_strdup(name);
215   arg->code = code;
216   arg->data = data;
217   arg->hostname = hostname;
218   arg->kill_time = kill_time;
219   arg->argc = argc;
220   arg->argv = xbt_new(char*,argc + 1);
221
222   int i;
223   for (i = 0; i < argc; i++) {
224     arg->argv[i] = xbt_strdup(argv[i]);
225   }
226
227   arg->properties = properties;
228   arg->auto_restart = auto_restart;
229
230   if( SIMIX_host_get_state(host) == SURF_RESOURCE_OFF
231       && !xbt_dict_get_or_null(watched_hosts_lib,host->name)){
232     xbt_dict_set(watched_hosts_lib,host->name,host,NULL);
233     XBT_DEBUG("Have push host %s to watched_hosts_lib because state == SURF_RESOURCE_OFF",host->name);
234   }
235   xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg);
236 }
237 /**
238  * \brief Restart the list of processes that have been registered to the host
239  */
240 void SIMIX_host_restart_processes(smx_host_t host)
241 {
242   unsigned int cpt;
243   smx_process_arg_t arg;
244   xbt_dynar_foreach(host->auto_restart_processes,cpt,arg) {
245
246     smx_process_t process;
247
248     XBT_DEBUG("Restarting Process %s(%s) right now", arg->argv[0], arg->hostname);
249     if (simix_global->create_process_function) {
250       simix_global->create_process_function(&process,
251                                             arg->argv[0],
252                                             arg->code,
253                                             NULL,
254                                             arg->hostname,
255                                             arg->kill_time,
256                                             arg->argc,
257                                             arg->argv,
258                                             arg->properties,
259                                             arg->auto_restart);
260     }
261     else {
262       simcall_process_create(&process,
263                                             arg->argv[0],
264                                             arg->code,
265                                             NULL,
266                                             arg->hostname,
267                                             arg->kill_time,
268                                             arg->argc,
269                                             arg->argv,
270                                             arg->properties,
271                                             arg->auto_restart);
272
273     }
274   }
275   xbt_dynar_reset(host->auto_restart_processes);
276 }
277
278 void SIMIX_host_set_data(smx_host_t host, void *data)
279 {
280   xbt_assert((host != NULL), "Invalid parameters");
281   xbt_assert((host->data == NULL), "Data already set");
282
283   host->data = data;
284 }
285
286 smx_action_t SIMIX_host_execute(const char *name, smx_host_t host,
287                                 double computation_amount,
288                                 double priority)
289 {
290   /* alloc structures and initialize */
291   smx_action_t action = xbt_mallocator_get(simix_global->action_mallocator);
292   action->type = SIMIX_ACTION_EXECUTE;
293   action->name = xbt_strdup(name);
294   action->state = SIMIX_RUNNING;
295   action->execution.host = host;
296
297 #ifdef HAVE_TRACING
298   action->category = NULL;
299 #endif
300
301   /* set surf's action */
302   if (!MC_IS_ENABLED) {
303     action->execution.surf_exec =
304       surf_workstation_model->extension.workstation.execute(host->host,
305     computation_amount);
306     surf_workstation_model->action_data_set(action->execution.surf_exec, action);
307     surf_workstation_model->set_priority(action->execution.surf_exec, priority);
308   }
309
310   XBT_DEBUG("Create execute action %p", action);
311
312   return action;
313 }
314
315 smx_action_t SIMIX_host_parallel_execute( const char *name,
316     int host_nb, smx_host_t *host_list,
317     double *computation_amount, double *communication_amount,
318     double amount, double rate)
319 {
320   void **workstation_list = NULL;
321   int i;
322
323   /* alloc structures and initialize */
324   smx_action_t action = xbt_mallocator_get(simix_global->action_mallocator);
325   action->type = SIMIX_ACTION_PARALLEL_EXECUTE;
326   action->name = xbt_strdup(name);
327   action->state = SIMIX_RUNNING;
328   action->execution.host = NULL; /* FIXME: do we need the list of hosts? */
329
330 #ifdef HAVE_TRACING
331   action->category = NULL;
332 #endif
333
334   /* set surf's action */
335   workstation_list = xbt_new0(void *, host_nb);
336   for (i = 0; i < host_nb; i++)
337     workstation_list[i] = host_list[i]->host;
338
339   /* set surf's action */
340   if (!MC_IS_ENABLED) {
341     action->execution.surf_exec =
342       surf_workstation_model->extension.workstation.
343       execute_parallel_task(host_nb, workstation_list, computation_amount,
344                       communication_amount, rate);
345
346     surf_workstation_model->action_data_set(action->execution.surf_exec, action);
347   }
348   XBT_DEBUG("Create parallel execute action %p", action);
349
350   return action;
351 }
352
353 void SIMIX_host_execution_destroy(smx_action_t action)
354 {
355   int destroyed=0;
356   XBT_DEBUG("Destroy action %p", action);
357
358
359   if (action->execution.surf_exec) {
360     destroyed = surf_workstation_model->action_unref(action->execution.surf_exec);
361     action->execution.surf_exec = NULL;
362   }
363
364   if (destroyed) {
365     xbt_free(action->name);
366     xbt_mallocator_release(simix_global->action_mallocator, action);
367   }
368 }
369
370 void SIMIX_host_execution_cancel(smx_action_t action)
371 {
372   XBT_DEBUG("Cancel action %p", action);
373
374   if (action->execution.surf_exec)
375     surf_workstation_model->action_cancel(action->execution.surf_exec);
376 }
377
378 double SIMIX_host_execution_get_remains(smx_action_t action)
379 {
380   double result = 0.0;
381
382   if (action->state == SIMIX_RUNNING)
383     result = surf_workstation_model->get_remains(action->execution.surf_exec);
384
385   return result;
386 }
387
388 e_smx_state_t SIMIX_host_execution_get_state(smx_action_t action)
389 {
390   return action->state;
391 }
392
393 void SIMIX_host_execution_set_priority(smx_action_t action, double priority)
394 {
395   if(action->execution.surf_exec)
396     surf_workstation_model->set_priority(action->execution.surf_exec, priority);
397 }
398
399 void SIMIX_pre_host_execution_wait(smx_simcall_t simcall)
400 {
401   smx_action_t action = simcall->host_execution_wait.execution;
402
403   XBT_DEBUG("Wait for execution of action %p, state %d", action, (int)action->state);
404
405   /* Associate this simcall to the action */
406   xbt_fifo_push(action->simcalls, simcall);
407   simcall->issuer->waiting_action = action;
408
409   /* set surf's action */
410   if (MC_IS_ENABLED) {
411     action->state = SIMIX_DONE;
412     SIMIX_execution_finish(action);
413     return;
414   }
415
416   /* If the action is already finished then perform the error handling */
417   if (action->state != SIMIX_RUNNING)
418     SIMIX_execution_finish(action);
419 }
420
421 void SIMIX_host_execution_suspend(smx_action_t action)
422 {
423   if(action->execution.surf_exec)
424     surf_workstation_model->suspend(action->execution.surf_exec);
425 }
426
427 void SIMIX_host_execution_resume(smx_action_t action)
428 {
429   if(action->execution.surf_exec)
430     surf_workstation_model->resume(action->execution.surf_exec);
431 }
432
433 void SIMIX_execution_finish(smx_action_t action)
434 {
435   xbt_fifo_item_t item;
436   smx_simcall_t simcall;
437
438   xbt_fifo_foreach(action->simcalls, item, simcall, smx_simcall_t) {
439
440     switch (action->state) {
441
442       case SIMIX_DONE:
443         /* do nothing, action done */
444   XBT_DEBUG("SIMIX_execution_finished: execution successful");
445         break;
446
447       case SIMIX_FAILED:
448         XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name);
449         simcall->issuer->context->iwannadie = 1;
450         //SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
451         break;
452
453       case SIMIX_CANCELED:
454         XBT_DEBUG("SIMIX_execution_finished: execution canceled");
455         SMX_EXCEPTION(simcall->issuer, cancel_error, 0, "Canceled");
456         break;
457
458       default:
459         xbt_die("Internal error in SIMIX_execution_finish: unexpected action state %d",
460             (int)action->state);
461     }
462     /* check if the host is down */
463     if (surf_workstation_model->extension.
464         workstation.get_state(simcall->issuer->smx_host->host) != SURF_RESOURCE_ON) {
465       simcall->issuer->context->iwannadie = 1;
466     }
467
468     simcall->issuer->waiting_action =    NULL;
469     simcall->host_execution_wait.result = action->state;
470     SIMIX_simcall_answer(simcall);
471   }
472
473   /* We no longer need it */
474   SIMIX_host_execution_destroy(action);
475 }
476
477 void SIMIX_post_host_execute(smx_action_t action)
478 {
479   if (action->type == SIMIX_ACTION_EXECUTE && /* FIMXE: handle resource failure
480                                                * for parallel tasks too */
481       surf_workstation_model->extension.workstation.get_state(action->execution.host->host) == SURF_RESOURCE_OFF) {
482     /* If the host running the action failed, notice it so that the asking
483      * process can be killed if it runs on that host itself */
484     action->state = SIMIX_FAILED;
485   } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) {
486     /* If the host running the action didn't fail, then the action was
487      * canceled */
488     action->state = SIMIX_CANCELED;
489   } else {
490     action->state = SIMIX_DONE;
491   }
492
493   if (action->execution.surf_exec) {
494     surf_workstation_model->action_unref(action->execution.surf_exec);
495     action->execution.surf_exec = NULL;
496   }
497
498   /* If there are simcalls associated with the action, then answer them */
499   if (xbt_fifo_size(action->simcalls)) {
500     SIMIX_execution_finish(action);
501   }
502 }
503
504
505 #ifdef HAVE_TRACING
506 void SIMIX_set_category(smx_action_t action, const char *category)
507 {
508   if (action->state != SIMIX_RUNNING) return;
509   if (action->type == SIMIX_ACTION_EXECUTE){
510     surf_workstation_model->set_category(action->execution.surf_exec, category);
511   }else if (action->type == SIMIX_ACTION_COMMUNICATE){
512     surf_workstation_model->set_category(action->comm.surf_comm, category);
513   }
514 }
515 #endif
516