Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
refactoring smpi into multiple source files.
[simgrid.git] / src / smpi / smpi_base.c
1 #include <stdio.h>
2 #include <signal.h>
3 #include <sys/time.h>
4
5 #include "private.h"
6
7 SMPI_Global_t     smpi_global     = NULL;
8
9 SMPI_MPI_Global_t smpi_mpi_global = NULL;
10
11 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi,XBT_LOG_ROOT_CAT, "All SMPI categories (see \ref SMPI_API)");
12
13 int inline smpi_mpi_comm_size(smpi_mpi_communicator_t *comm)
14 {
15         return comm->size;
16 }
17
18 // FIXME: smarter algorithm?
19 int smpi_mpi_comm_rank(smpi_mpi_communicator_t *comm, smx_host_t host)
20 {
21         int i;
22
23         for(i = comm->size - 1; i > 0 && host != comm->simdata->hosts[i]; i--);
24
25         return i;
26 }
27
28 int inline smpi_mpi_comm_rank_self(smpi_mpi_communicator_t *comm)
29 {
30         return smpi_mpi_comm_rank(comm, SIMIX_host_self());
31 }
32
33 void *smpi_request_new()
34 {
35         smpi_mpi_request_t *request = xbt_new(smpi_mpi_request_t, 1);
36
37         request->completed = 0;
38         request->simdata            = xbt_new(s_smpi_mpi_request_simdata_t, 1);
39         request->simdata->mutex     = SIMIX_mutex_init();
40         request->simdata->cond      = SIMIX_cond_init();
41
42         return request;
43 }
44
45 void smpi_request_free(void *pointer)
46 {
47
48         smpi_mpi_request_t *request = pointer;
49
50         if (NULL != request) {
51                 SIMIX_cond_destroy(request->simdata->cond);
52                 SIMIX_mutex_destroy(request->simdata->mutex);
53                 xbt_free(request->simdata);
54                 xbt_free(request);
55         }
56
57         return;
58 }
59
60 void smpi_request_reset(void *pointer)
61 {
62         return;
63 }
64
65
66 void *smpi_message_new()
67 {
68         return xbt_new(smpi_received_message_t, 1);
69 }
70
71 void smpi_message_free(void *pointer)
72 {
73         if (NULL != pointer) {
74                 xbt_free(pointer);
75         }
76
77         return;
78 }
79
80 void smpi_message_reset(void *pointer)
81 {
82         return;
83 }
84
85 void smpi_global_init()
86 {
87         int i;
88
89         int size = SIMIX_host_get_number();
90
91         smpi_global = xbt_new(s_SMPI_Global_t, 1);
92
93         // config variable
94         smpi_global->reference_speed                     = SMPI_DEFAULT_SPEED;
95
96         smpi_global->root_ready                          = 0;
97         smpi_global->ready_process_count                 = 0;
98
99         // start/stop
100         smpi_global->start_stop_mutex                    = SIMIX_mutex_init();
101         smpi_global->start_stop_cond                     = SIMIX_cond_init();
102
103         // processes
104         smpi_global->sender_processes                    = xbt_new(smx_process_t, size);
105         smpi_global->receiver_processes                  = xbt_new(smx_process_t, size);
106
107         // running hosts
108         smpi_global->running_hosts_count_mutex           = SIMIX_mutex_init();
109         smpi_global->running_hosts_count                 = 0;
110
111         // mallocators
112         smpi_global->request_mallocator                  = xbt_mallocator_new(SMPI_REQUEST_MALLOCATOR_SIZE,
113                                                              smpi_request_new, smpi_request_free, smpi_request_reset);
114         smpi_global->message_mallocator                  = xbt_mallocator_new(SMPI_MESSAGE_MALLOCATOR_SIZE,
115                                                              smpi_message_new, smpi_message_free, smpi_message_reset);
116
117         //
118         smpi_global->pending_send_request_queues         = xbt_new(xbt_fifo_t,  size);
119         smpi_global->pending_send_request_queues_mutexes = xbt_new(smx_mutex_t, size);
120         smpi_global->pending_recv_request_queues         = xbt_new(xbt_fifo_t,  size);
121         smpi_global->pending_recv_request_queues_mutexes = xbt_new(smx_mutex_t, size);
122         smpi_global->received_message_queues             = xbt_new(xbt_fifo_t,  size);
123         smpi_global->received_message_queues_mutexes     = xbt_new(smx_mutex_t, size);
124         smpi_global->timers                              = xbt_new(xbt_os_timer_t, size);
125         smpi_global->timers_mutexes                      = xbt_new(smx_mutex_t, size);
126
127         for(i = 0; i < size; i++) {
128                 smpi_global->pending_send_request_queues[i]         = xbt_fifo_new();
129                 smpi_global->pending_send_request_queues_mutexes[i] = SIMIX_mutex_init();
130                 smpi_global->pending_recv_request_queues[i]         = xbt_fifo_new();
131                 smpi_global->pending_recv_request_queues_mutexes[i] = SIMIX_mutex_init();
132                 smpi_global->received_message_queues[i]             = xbt_fifo_new();
133                 smpi_global->received_message_queues_mutexes[i]     = SIMIX_mutex_init();
134                 smpi_global->timers[i]                              = xbt_os_timer_new();
135                 smpi_global->timers_mutexes[i]                      = SIMIX_mutex_init();
136         }
137
138 }
139
140 void smpi_global_destroy()
141 {
142         int i;
143
144         int size = SIMIX_host_get_number();
145
146         // start/stop
147         SIMIX_mutex_destroy(smpi_global->start_stop_mutex);
148         SIMIX_cond_destroy(smpi_global->start_stop_cond);
149
150         // processes
151         xbt_free(smpi_global->sender_processes);
152         xbt_free(smpi_global->receiver_processes);
153
154         // running hosts
155         SIMIX_mutex_destroy(smpi_global->running_hosts_count_mutex);
156
157         // mallocators
158         xbt_mallocator_free(smpi_global->request_mallocator);
159         xbt_mallocator_free(smpi_global->message_mallocator);
160
161         for(i = 0; i < size; i++) {
162                 xbt_fifo_free(smpi_global->pending_send_request_queues[i]);
163                 SIMIX_mutex_destroy(smpi_global->pending_send_request_queues_mutexes[i]);
164                 xbt_fifo_free(smpi_global->pending_recv_request_queues[i]);
165                 SIMIX_mutex_destroy(smpi_global->pending_recv_request_queues_mutexes[i]);
166                 xbt_fifo_free(smpi_global->received_message_queues[i]);
167                 SIMIX_mutex_destroy(smpi_global->received_message_queues_mutexes[i]);
168                 xbt_os_timer_free(smpi_global->timers[i]);
169                 SIMIX_mutex_destroy(smpi_global->timers_mutexes[i]);
170         }
171
172         xbt_free(smpi_global->pending_send_request_queues);
173         xbt_free(smpi_global->pending_send_request_queues_mutexes);
174         xbt_free(smpi_global->pending_recv_request_queues);
175         xbt_free(smpi_global->pending_recv_request_queues_mutexes);
176         xbt_free(smpi_global->received_message_queues);
177         xbt_free(smpi_global->received_message_queues_mutexes);
178         xbt_free(smpi_global->timers);
179         xbt_free(smpi_global->timers_mutexes);
180
181         xbt_free(smpi_global);
182 }
183
184 int smpi_run_simulation(int argc, char **argv)
185 {
186         xbt_fifo_item_t cond_item   = NULL;
187         smx_cond_t   cond           = NULL;
188         smx_action_t action         = NULL;
189
190         xbt_fifo_t   actions_failed = xbt_fifo_new();
191         xbt_fifo_t   actions_done   = xbt_fifo_new();
192
193         srand(SMPI_RAND_SEED);
194
195         SIMIX_global_init(&argc, argv);
196
197         SIMIX_function_register("smpi_simulated_main", smpi_simulated_main);
198         SIMIX_function_register("smpi_sender",         smpi_sender);
199         SIMIX_function_register("smpi_receiver",       smpi_receiver);
200
201         // FIXME: ought to verify these files...
202         SIMIX_create_environment(argv[1]);
203
204         // must initialize globals between creating environment and launching app....
205         smpi_global_init();
206
207         SIMIX_launch_application(argv[2]);
208
209         /* Prepare to display some more info when dying on Ctrl-C pressing */
210         // FIXME: doesn't work
211         //signal(SIGINT, inthandler);
212
213         /* Clean IO before the run */
214         fflush(stdout);
215         fflush(stderr);
216
217         while (SIMIX_solve(actions_done, actions_failed) != -1.0) {
218                 while ((action = xbt_fifo_pop(actions_failed))) {
219                         DEBUG1("** %s failed **", action->name);
220                         xbt_fifo_foreach(action->cond_list, cond_item, cond, smx_cond_t) {
221                                 SIMIX_cond_broadcast(cond);
222                                 SIMIX_unregister_action_to_condition(action, cond);
223                         }
224                         SIMIX_action_destroy(action);
225                 }
226                 while ((action = xbt_fifo_pop(actions_done))) {
227                         DEBUG1("** %s done **",action->name);
228                         xbt_fifo_foreach(action->cond_list, cond_item, cond, smx_cond_t) {
229                                 SIMIX_cond_broadcast(cond);
230                                 SIMIX_unregister_action_to_condition(action, cond);
231                         }
232                         SIMIX_action_destroy(action);
233                 }
234         }
235
236         xbt_fifo_free(actions_failed);
237         xbt_fifo_free(actions_done);
238
239         INFO1("simulation time %g", SIMIX_get_clock());
240
241         smpi_global_destroy();
242
243         SIMIX_clean();
244
245         return 0;
246 }
247
248 void smpi_mpi_land_func(void *x, void *y, void *z)
249 {
250         *(int *)z = *(int *)x && *(int *)y;
251 }
252
253 void smpi_mpi_sum_func(void *x, void *y, void *z)
254 {
255         *(int *)z = *(int *)x + *(int *)y;
256 }
257
258
259 void smpi_mpi_init()
260 {
261         smx_process_t process;
262         smx_host_t host;
263         smx_host_t *hosts;
264         int size;
265
266         SIMIX_mutex_lock(smpi_global->running_hosts_count_mutex);
267         smpi_global->running_hosts_count++;
268         SIMIX_mutex_unlock(smpi_global->running_hosts_count_mutex);
269
270         // initialize some local variables
271         process = SIMIX_process_self();
272         host    = SIMIX_host_self();
273         hosts   = SIMIX_host_get_table();
274         size    = SIMIX_host_get_number();
275
276         // node 0 sets the globals
277         if (host == hosts[0]) {
278
279                 smpi_mpi_global                                = xbt_new(s_SMPI_MPI_Global_t, 1);
280
281                 // global communicator
282                 smpi_mpi_global->mpi_comm_world                         = xbt_new(smpi_mpi_communicator_t, 1);
283                 smpi_mpi_global->mpi_comm_world->size                   = size;
284                 smpi_mpi_global->mpi_comm_world->simdata                = xbt_new(s_smpi_mpi_communicator_simdata_t, 1);
285                 smpi_mpi_global->mpi_comm_world->simdata->barrier_count = 0;
286                 smpi_mpi_global->mpi_comm_world->simdata->barrier_mutex = SIMIX_mutex_init();
287                 smpi_mpi_global->mpi_comm_world->simdata->barrier_cond  = SIMIX_cond_init();
288                 smpi_mpi_global->mpi_comm_world->simdata->hosts         = hosts;
289                 smpi_mpi_global->mpi_comm_world->simdata->processes     = xbt_new(smx_process_t, size);
290                 smpi_mpi_global->mpi_comm_world->simdata->processes[0]  = process;
291
292                 // mpi datatypes
293                 smpi_mpi_global->mpi_byte                      = xbt_new(smpi_mpi_datatype_t, 1);
294                 smpi_mpi_global->mpi_byte->size                = (size_t)1;
295                 smpi_mpi_global->mpi_int                       = xbt_new(smpi_mpi_datatype_t, 1);
296                 smpi_mpi_global->mpi_int->size                 = sizeof(int);
297                 smpi_mpi_global->mpi_double                    = xbt_new(smpi_mpi_datatype_t, 1);
298                 smpi_mpi_global->mpi_double->size              = sizeof(double);
299
300                 // mpi operations
301                 smpi_mpi_global->mpi_land                      = xbt_new(smpi_mpi_op_t, 1);
302                 smpi_mpi_global->mpi_land->func                = smpi_mpi_land_func;
303                 smpi_mpi_global->mpi_sum                       = xbt_new(smpi_mpi_op_t, 1);
304                 smpi_mpi_global->mpi_sum->func                 = smpi_mpi_sum_func;
305
306                 // signal all nodes to perform initialization
307                 SIMIX_mutex_lock(smpi_global->start_stop_mutex);
308                 smpi_global->root_ready = 1;
309                 SIMIX_cond_broadcast(smpi_global->start_stop_cond);
310                 SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
311
312         } else {
313
314                 // make sure root is done before own initialization
315                 SIMIX_mutex_lock(smpi_global->start_stop_mutex);
316                 if (!smpi_global->root_ready) {
317                         SIMIX_cond_wait(smpi_global->start_stop_cond, smpi_global->start_stop_mutex);
318                 }
319                 SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
320
321                 smpi_mpi_global->mpi_comm_world->simdata->processes[smpi_mpi_comm_rank_self(smpi_mpi_global->mpi_comm_world)] = process;
322         }
323
324         // wait for all nodes to signal initializatin complete
325         SIMIX_mutex_lock(smpi_global->start_stop_mutex);
326         smpi_global->ready_process_count++;
327         if (smpi_global->ready_process_count < 3 * size) {
328                 SIMIX_cond_wait(smpi_global->start_stop_cond, smpi_global->start_stop_mutex);
329         } else {
330                 SIMIX_cond_broadcast(smpi_global->start_stop_cond);
331         }
332         SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
333
334         return;
335 }
336
337 void smpi_mpi_finalize()
338 {
339         int i;
340
341         SIMIX_mutex_lock(smpi_global->running_hosts_count_mutex);
342         i = --smpi_global->running_hosts_count;
343         SIMIX_mutex_unlock(smpi_global->running_hosts_count_mutex);
344
345         SIMIX_mutex_lock(smpi_global->start_stop_mutex);
346         smpi_global->ready_process_count--;
347         SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
348
349         if (0 >= i) {
350
351                 // wake up senders/receivers
352                 for (i = 0; i < smpi_mpi_global->mpi_comm_world->size; i++) {
353                         if (SIMIX_process_is_suspended(smpi_global->sender_processes[i])) {
354                                 SIMIX_process_resume(smpi_global->sender_processes[i]);
355                         }
356                         if (SIMIX_process_is_suspended(smpi_global->receiver_processes[i])) {
357                                 SIMIX_process_resume(smpi_global->receiver_processes[i]);
358                         }
359                 }
360
361                 // wait for senders/receivers to exit...
362                 SIMIX_mutex_lock(smpi_global->start_stop_mutex);
363                 if (smpi_global->ready_process_count > 0) {
364                         SIMIX_cond_wait(smpi_global->start_stop_cond, smpi_global->start_stop_mutex);
365                 }
366                 SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
367
368                 SIMIX_mutex_destroy(smpi_mpi_global->mpi_comm_world->simdata->barrier_mutex);
369                 SIMIX_cond_destroy(smpi_mpi_global->mpi_comm_world->simdata->barrier_cond);
370                 xbt_free(smpi_mpi_global->mpi_comm_world->simdata->processes);
371                 xbt_free(smpi_mpi_global->mpi_comm_world->simdata);
372                 xbt_free(smpi_mpi_global->mpi_comm_world);
373
374                 xbt_free(smpi_mpi_global->mpi_byte);
375                 xbt_free(smpi_mpi_global->mpi_int);
376                 xbt_free(smpi_mpi_global->mpi_double);
377
378                 xbt_free(smpi_mpi_global->mpi_land);
379                 xbt_free(smpi_mpi_global->mpi_sum);
380
381                 xbt_free(smpi_mpi_global);
382         }
383
384 }
385
386 // FIXME: could cause trouble with multithreaded procs on same host...
387 void smpi_bench_begin()
388 {
389         int rank = smpi_mpi_comm_rank_self(smpi_mpi_global->mpi_comm_world);
390         SIMIX_mutex_lock(smpi_global->timers_mutexes[rank]);
391         xbt_os_timer_start(smpi_global->timers[rank]);
392         return;
393 }
394
395 void smpi_bench_end()
396 {
397         int rank = smpi_mpi_comm_rank_self(smpi_mpi_global->mpi_comm_world);
398         double duration;
399         smx_host_t host;
400         smx_action_t compute_action;
401         smx_mutex_t mutex;
402         smx_cond_t cond;
403
404         xbt_os_timer_stop(smpi_global->timers[rank]);
405
406         duration       = xbt_os_timer_elapsed(smpi_global->timers[rank]);
407         SIMIX_mutex_unlock(smpi_global->timers_mutexes[rank]);
408
409         host           = smpi_mpi_global->mpi_comm_world->simdata->hosts[rank];
410         compute_action = SIMIX_action_execute(host, NULL, duration * SMPI_DEFAULT_SPEED);
411         mutex          = SIMIX_mutex_init();
412         cond           = SIMIX_cond_init();
413
414         SIMIX_mutex_lock(mutex);
415         SIMIX_register_action_to_condition(compute_action, cond);
416         SIMIX_cond_wait(cond, mutex);
417         //SIMIX_unregister_action_to_condition(compute_action, cond);
418         SIMIX_mutex_unlock(mutex);
419
420         SIMIX_mutex_destroy(mutex);
421         SIMIX_cond_destroy(cond);
422
423         // FIXME: check for success/failure?
424
425         return;
426 }
427
428 void smpi_barrier(smpi_mpi_communicator_t *comm)
429 {
430
431         SIMIX_mutex_lock(comm->simdata->barrier_mutex);
432         if(++comm->simdata->barrier_count < comm->size) {
433                 SIMIX_cond_wait(comm->simdata->barrier_cond, comm->simdata->barrier_mutex);
434         } else {
435                 comm->simdata->barrier_count = 0;
436                 SIMIX_cond_broadcast(comm->simdata->barrier_cond);
437         }
438         SIMIX_mutex_unlock(comm->simdata->barrier_mutex);
439
440         return;
441 }
442
443 // FIXME: smarter algorithm...
444 int smpi_comm_rank(smpi_mpi_communicator_t *comm, smx_host_t host)
445 {
446         int i;
447         for(i = 0; i < comm->size && host != comm->simdata->hosts[i]; i++);
448         if (i >= comm->size) i = -1;
449         return i;
450 }
451
452 int smpi_create_request(void *buf, int count, smpi_mpi_datatype_t *datatype,
453         int src, int dst, int tag, smpi_mpi_communicator_t *comm, smpi_mpi_request_t **request)
454 {
455         int retval = MPI_SUCCESS;
456
457         *request = NULL;
458
459         if (0 > count) {
460                 retval = MPI_ERR_COUNT;
461         } else if (NULL == buf) {
462                 retval = MPI_ERR_INTERN;
463         } else if (NULL == datatype) {
464                 retval = MPI_ERR_TYPE;
465         } else if (NULL == comm) {
466                 retval = MPI_ERR_COMM;
467         } else if (MPI_ANY_SOURCE != src && (0 > src || comm->size <= src)) {
468                 retval = MPI_ERR_RANK;
469         } else if (0 > dst || comm->size <= dst) {
470                 retval = MPI_ERR_RANK;
471         } else if (0 > tag) {
472                 retval = MPI_ERR_TAG;
473         } else {
474                 *request = xbt_mallocator_get(smpi_global->request_mallocator);
475                 (*request)->comm       = comm;
476                 (*request)->src        = src;
477                 (*request)->dst        = dst;
478                 (*request)->tag        = tag;
479                 (*request)->buf        = buf;
480                 (*request)->count      = count;
481                 (*request)->datatype   = datatype;
482         }
483         return retval;
484 }
485
486 int smpi_isend(smpi_mpi_request_t *request)
487 {
488         int retval = MPI_SUCCESS;
489         int rank   = smpi_mpi_comm_rank_self(smpi_mpi_global->mpi_comm_world);
490
491         if (NULL != request) {
492                 SIMIX_mutex_lock(smpi_global->pending_send_request_queues_mutexes[rank]);
493                 xbt_fifo_push(smpi_global->pending_send_request_queues[rank], request);
494                 SIMIX_mutex_unlock(smpi_global->pending_send_request_queues_mutexes[rank]);
495         }
496
497         if (SIMIX_process_is_suspended(smpi_global->sender_processes[rank])) {
498                 SIMIX_process_resume(smpi_global->sender_processes[rank]);
499         }
500
501         return retval;
502 }
503
504 int smpi_irecv(smpi_mpi_request_t *request)
505 {
506         int retval = MPI_SUCCESS;
507         int rank = smpi_mpi_comm_rank_self(smpi_mpi_global->mpi_comm_world);
508
509         if (NULL != request) {
510                 SIMIX_mutex_lock(smpi_global->pending_recv_request_queues_mutexes[rank]);
511                 xbt_fifo_push(smpi_global->pending_recv_request_queues[rank], request);
512                 SIMIX_mutex_unlock(smpi_global->pending_recv_request_queues_mutexes[rank]);
513         }
514
515         if (SIMIX_process_is_suspended(smpi_global->receiver_processes[rank])) {
516                 SIMIX_process_resume(smpi_global->receiver_processes[rank]);
517         }
518
519         return retval;
520 }
521
522 void smpi_wait(smpi_mpi_request_t *request, smpi_mpi_status_t *status)
523 {
524         if (NULL != request) {
525                 SIMIX_mutex_lock(request->simdata->mutex);
526                 if (!request->completed) {
527                         SIMIX_cond_wait(request->simdata->cond, request->simdata->mutex);
528                 }
529                 if (NULL != status) {
530                         status->MPI_SOURCE = request->src;
531                 }
532                 SIMIX_mutex_unlock(request->simdata->mutex);
533         }
534 }
535
536 // FIXME: move into own file
537 int smpi_gettimeofday(struct timeval *tv, struct timezone *tz)
538 {
539         double now;
540         int retval = 0;
541         smpi_bench_end();
542         if (NULL == tv) {
543                 retval = -1;
544         } else {
545                 now = SIMIX_get_clock();
546                 tv->tv_sec  = now;
547                 tv->tv_usec = ((now - (double)tv->tv_sec) * 1000000.0);
548         }
549         smpi_bench_begin();
550         return retval;
551 }
552
553 unsigned int smpi_sleep(unsigned int seconds)
554 {
555         smx_mutex_t mutex;
556         smx_cond_t cond;
557         smx_host_t host;
558         smx_action_t sleep_action;
559
560         smpi_bench_end();
561         host         = SIMIX_host_self();
562         sleep_action = SIMIX_action_sleep(host, seconds);
563         mutex        = SIMIX_mutex_init();
564         cond         = SIMIX_cond_init();
565
566         SIMIX_mutex_lock(mutex);
567         SIMIX_register_action_to_condition(sleep_action, cond);
568         SIMIX_cond_wait(cond, mutex);
569         //SIMIX_unregister_action_to_condition(sleep_action, cond);
570         SIMIX_mutex_unlock(mutex);
571
572         SIMIX_mutex_destroy(mutex);
573         SIMIX_cond_destroy(cond);
574
575         // FIXME: check for success/failure?
576
577         smpi_bench_begin();
578         return 0;
579 }
580
581 void smpi_exit(int status)
582 {
583         smpi_bench_end();
584         SIMIX_mutex_lock(smpi_global->running_hosts_count_mutex);
585         smpi_global->running_hosts_count--;
586         SIMIX_mutex_unlock(smpi_global->running_hosts_count_mutex);
587         SIMIX_process_kill(SIMIX_process_self());
588         return;
589 }