3 /* smpi_coll.c -- various optimized routing for collectives */
5 /* Copyright (c) 2009 Stephane Genaud. */
6 /* All rights reserved. */
8 /* This program is free software; you can redistribute it and/or modify it
9 * * under the terms of the license (GNU LGPL) which comes with this package. */
19 /* proctree taken and translated from P2P-MPI */
30 typedef struct proc_tree * proc_tree_t;
35 void build_tree( int index, int extent, proc_tree_t *tree);
36 proc_tree_t alloc_tree( int arity );
37 void free_tree( proc_tree_t tree);
38 void print_tree(proc_tree_t tree);
39 int binomial_tree_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm);
46 proc_tree_t alloc_tree( int arity ) {
47 proc_tree_t tree = malloc(1*sizeof(struct proc_tree));
50 tree->PROCTREE_A = arity;
52 tree->numChildren = 0;
53 tree->child = malloc(arity*sizeof(int));
54 for (i=0; i < arity; i++) {
65 void free_tree( proc_tree_t tree) {
73 * Build the tree depending on a process rank (index) and the group size (extent)
74 * @param index the rank of the calling process
75 * @param extent the total number of processes
77 void build_tree( int index, int extent, proc_tree_t *tree) {
78 int places = (*tree)->PROCTREE_A * index;
86 for (i = 1; i <= (*tree)->PROCTREE_A; i++) {
88 ch = (*tree)->PROCTREE_A * index + i + (*tree)->root;
89 //printf("places %d\n",places);
91 if (places < extent) {
92 //printf("ch <%d> = <%d>\n",i,ch);
93 //printf("adding to the tree at index <%d>\n\n",i-1);
94 (*tree)->child[i - 1] = ch;
95 (*tree)->numChildren++;
98 //fprintf(stderr,"not adding to the tree\n");
101 //fprintf(stderr,"procTree.numChildren <%d>\n",(*tree)->numChildren);
103 if (index == (*tree)->root) {
108 pr = (index - 1) / (*tree)->PROCTREE_A;
109 (*tree)->parent = pr;
114 void print_tree(proc_tree_t tree) {
117 if (-1 != tree->parent ) {
118 printf("[%d]\n +---",tree->parent);
124 printf("<%d>\n",tree->me);
125 for (i=0;i < tree->numChildren; i++) {
126 printf("%s +--- %d\n", spacer,tree->child[i]);
134 int binomial_tree_bcast(void *buf, int count, MPI_Datatype datatype, int root,
137 int system_tag = 999; // used negative int but smpi_create_request() declares this illegal (to be checked)
139 int retval = MPI_SUCCESS;
141 smpi_mpi_request_t request;
142 smpi_mpi_request_t * requests;
145 rank = smpi_mpi_comm_rank(comm);
146 proc_tree_t tree = alloc_tree( 2 ); // arity=2: a binomial tree
148 build_tree( rank, comm->size, &tree );
149 /* wait for data from my parent in the tree */
152 printf("[%d] recv(%d from %d, tag=%d)\n",rank,rank, tree->parent, system_tag+rank);
154 retval = smpi_create_request(buf, count, datatype,
158 if (MPI_SUCCESS != retval) {
159 printf("** internal error: smpi_create_request() rank=%d returned retval=%d, %s:%d\n",
160 rank,retval,__FILE__,__LINE__);
162 smpi_mpi_irecv(request);
164 printf("[%d] waiting on irecv from %d\n",rank , tree->parent);
166 smpi_mpi_wait(request, MPI_STATUS_IGNORE);
169 tmpbufs = xbt_malloc( tree->numChildren * sizeof(void *));
170 requests = xbt_malloc( tree->numChildren * sizeof(smpi_mpi_request_t));
172 printf("[%d] creates %d requests\n",rank,tree->numChildren);
175 /* iniates sends to ranks lower in the tree */
176 for (i=0; i < tree->numChildren; i++) {
177 if (tree->child[i] != -1) {
179 printf("[%d] send(%d->%d, tag=%d)\n",rank,rank, tree->child[i], system_tag+tree->child[i]);
181 tmpbufs[i] = xbt_malloc( count * datatype->size);
182 memcpy( tmpbufs[i], buf, count * datatype->size * sizeof(char));
183 retval = smpi_create_request(tmpbufs[i], count, datatype,
184 rank, tree->child[i],
185 system_tag + tree->child[i],
186 comm, &(requests[i]));
188 printf("[%d] after create req[%d]=%p req->(src=%d,dst=%d)\n",rank , i, requests[i],requests[i]->src,requests[i]->dst );
190 if (MPI_SUCCESS != retval) {
191 printf("** internal error: smpi_create_request() rank=%d returned retval=%d, %s:%d\n",
192 rank,retval,__FILE__,__LINE__);
194 smpi_mpi_isend(requests[i]);
195 /* FIXME : we should not wait immediately here. See next FIXME. */
196 smpi_mpi_wait( requests[i], MPI_STATUS_IGNORE);
197 xbt_free(tmpbufs[i]);
198 xbt_free(requests[i]);
201 /* FIXME : normally, we sould wait only once all isend have been issued:
202 * this is the following commented code. It deadlocks, probably because
203 * of a bug in the sender process */
205 /* wait for completion of sends */
206 /* printf("[%d] wait for %d send completions\n",rank,tree->numChildren);
207 smpi_mpi_waitall( tree->numChildren, requests, MPI_STATUS_IGNORE);
208 printf("[%d] reqs completed\n)",rank);
226 for (rank=0;rank<size;rank++) {
227 printf("--------------tree for rank %d ----------\n",rank);
228 tree = alloc_tree( 2 );
229 build_tree( rank, size, &tree );
234 printf("-------------- bcast ----------\n");
235 for (rank=0;rank<size;rank++) {