Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
* added support for optimized collectives:
[simgrid.git] / src / smpi / smpi_coll.c
1 /* $Id$tag */
2
3 /* smpi_coll.c -- various optimized routing for collectives                   */
4
5 /* Copyright (c) 2009 Stephane Genaud.                                        */
6 /* All rights reserved.                                                       */
7
8 /* This program is free software; you can redistribute it and/or modify it
9  *  * under the terms of the license (GNU LGPL) which comes with this package. */
10
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #include "private.h"
17
18
19 /* proctree taken and translated from P2P-MPI */
20
21 struct proc_tree {
22         int PROCTREE_A;
23         int numChildren;
24         int * child;
25         int parent;
26         int me;
27         int root;
28         int isRoot;
29 };
30 typedef struct proc_tree * proc_tree_t;
31
32
33
34 /* prototypes */
35 void build_tree( int index, int extent, proc_tree_t *tree);
36 proc_tree_t alloc_tree( int arity );
37 void free_tree( proc_tree_t tree);
38 void print_tree(proc_tree_t tree);
39 int binomial_tree_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm);
40
41
42
43 /**
44  * alloc and init
45  **/
46 proc_tree_t alloc_tree( int arity ) {
47         proc_tree_t tree = malloc(1*sizeof(struct proc_tree));
48         int i;
49
50         tree->PROCTREE_A = arity;
51         tree->isRoot = 0; 
52         tree->numChildren = 0;
53         tree->child = malloc(arity*sizeof(int));
54         for (i=0; i < arity; i++) {
55                 tree->child[i] = -1;
56         }
57         tree->root = -1;
58         tree->parent = -1;
59         return( tree );
60 }
61
62 /**
63  * free
64  **/
65 void free_tree( proc_tree_t tree) {
66         free (tree->child );
67         free(tree);
68 }
69
70
71
72 /**
73  * Build the tree depending on a process rank (index) and the group size (extent)
74  * @param index the rank of the calling process
75  * @param extent the total number of processes
76  **/
77 void build_tree( int index, int extent, proc_tree_t *tree) {
78         int places = (*tree)->PROCTREE_A * index;
79         int i;
80         int ch;
81         int pr;
82
83         (*tree)->me = index;
84         (*tree)->root = 0 ;
85
86         for (i = 1; i <= (*tree)->PROCTREE_A; i++) {
87                 ++places;
88                 ch = (*tree)->PROCTREE_A * index + i + (*tree)->root;
89                 //printf("places %d\n",places);
90                 ch %= extent;
91                 if (places < extent) {
92                          //printf("ch <%d> = <%d>\n",i,ch);
93                          //printf("adding to the tree at index <%d>\n\n",i-1);
94                         (*tree)->child[i - 1] = ch;
95                         (*tree)->numChildren++;
96                 }
97                 else {
98                        //fprintf(stderr,"not adding to the tree\n");
99                 }
100         }
101         //fprintf(stderr,"procTree.numChildren <%d>\n",(*tree)->numChildren);
102
103         if (index == (*tree)->root) {
104                 (*tree)->isRoot = 1;
105         }
106         else {
107                 (*tree)->isRoot = 0;
108                 pr = (index - 1) / (*tree)->PROCTREE_A;
109                 (*tree)->parent = pr;
110         }
111 }
112
113
114 void print_tree(proc_tree_t tree) {
115       int i;
116       char *spacer;
117       if (-1 != tree->parent ) {
118             printf("[%d]\n   +---",tree->parent);
119             spacer= strdup("     ");
120       }
121       else {
122             spacer=strdup("");
123       }
124       printf("<%d>\n",tree->me);
125       for (i=0;i < tree->numChildren; i++) {
126               printf("%s   +--- %d\n", spacer,tree->child[i]);
127       }
128       free(spacer);
129 }
130       
131 /**
132  * bcast
133  **/
134 int binomial_tree_bcast(void *buf, int count, MPI_Datatype datatype, int root,
135                 MPI_Comm comm)
136 {
137         int system_tag = 999;  // used negative int but smpi_create_request() declares this illegal (to be checked)
138         int rank;
139         int retval = MPI_SUCCESS;
140         int i;
141         smpi_mpi_request_t request;
142         smpi_mpi_request_t * requests;
143         void **tmpbufs; 
144
145         rank = smpi_mpi_comm_rank(comm);
146         proc_tree_t tree = alloc_tree( 2 ); // arity=2: a binomial tree
147
148         build_tree( rank, comm->size, &tree );
149         /* wait for data from my parent in the tree */
150         if (!tree->isRoot) {
151 #ifdef DEBUG_STEPH
152                 printf("[%d] recv(%d  from %d, tag=%d)\n",rank,rank, tree->parent, system_tag+rank);
153 #endif
154                 retval = smpi_create_request(buf, count, datatype, 
155                                 tree->parent, rank, 
156                                 system_tag + rank, 
157                                 comm, &request);
158                 if (MPI_SUCCESS != retval) {
159                         printf("** internal error: smpi_create_request() rank=%d returned retval=%d, %s:%d\n",
160                                         rank,retval,__FILE__,__LINE__);
161                 }
162                 smpi_mpi_irecv(request);
163 #ifdef DEBUG_STEPH
164                 printf("[%d] waiting on irecv from %d\n",rank , tree->parent);
165 #endif
166                 smpi_mpi_wait(request, MPI_STATUS_IGNORE);
167         }
168
169         tmpbufs  = xbt_malloc( tree->numChildren * sizeof(void *)); 
170         requests = xbt_malloc( tree->numChildren * sizeof(smpi_mpi_request_t));
171 #ifdef DEBUG_STEPH
172         printf("[%d] creates %d requests\n",rank,tree->numChildren);
173 #endif
174
175         /* iniates sends to ranks lower in the tree */
176         for (i=0; i < tree->numChildren; i++) {
177                 if (tree->child[i] != -1) {
178 #ifdef DEBUG_STEPH
179                         printf("[%d] send(%d->%d, tag=%d)\n",rank,rank, tree->child[i], system_tag+tree->child[i]);
180 #endif
181                         tmpbufs[i] =  xbt_malloc( count * datatype->size);
182                         memcpy( tmpbufs[i], buf, count * datatype->size * sizeof(char));
183                         retval = smpi_create_request(tmpbufs[i], count, datatype, 
184                                         rank, tree->child[i], 
185                                         system_tag + tree->child[i], 
186                                         comm, &(requests[i]));
187 #ifdef DEBUG_STEPH
188                         printf("[%d] after create req[%d]=%p req->(src=%d,dst=%d)\n",rank , i, requests[i],requests[i]->src,requests[i]->dst );
189 #endif
190                         if (MPI_SUCCESS != retval) {
191                               printf("** internal error: smpi_create_request() rank=%d returned retval=%d, %s:%d\n",
192                                               rank,retval,__FILE__,__LINE__);
193                         }
194                         smpi_mpi_isend(requests[i]);
195                         /* FIXME : we should not wait immediately here. See next FIXME. */
196                         smpi_mpi_wait( requests[i], MPI_STATUS_IGNORE);
197                         xbt_free(tmpbufs[i]);
198                         xbt_free(requests[i]);
199                 }
200         }
201         /* FIXME : normally, we sould wait only once all isend have been issued:
202          * this is the following commented code. It deadlocks, probably because
203          * of a bug in the sender process */
204         
205         /* wait for completion of sends */
206         /* printf("[%d] wait for %d send completions\n",rank,tree->numChildren);
207           smpi_mpi_waitall( tree->numChildren, requests, MPI_STATUS_IGNORE);
208          printf("[%d] reqs completed\n)",rank);
209            */
210         
211         xbt_free(tmpbufs);
212         xbt_free(requests);
213         return(retval);
214 }
215
216 /**
217  * example usage
218  **/
219 /*
220  * int main() {
221
222       int rank; 
223       int size=12;
224
225       proc_tree_t tree;
226       for (rank=0;rank<size;rank++) {
227             printf("--------------tree for rank %d ----------\n",rank);
228             tree = alloc_tree( 2 );
229             build_tree( rank, size, &tree );
230             print_tree( tree );
231             free_tree( tree );
232    
233       }
234       printf("-------------- bcast ----------\n");
235       for (rank=0;rank<size;rank++) {
236               bcast( rank, size );
237       }
238
239
240 }
241 */
242
243                 
244