12 // sandler says, compile with mpicc -v alltoalldemo.c
13 // run with mpirun -np 3 a.out -m 5
15 int main( int argc, char *argv[] )
20 int j; // added by sandler
25 MPI_Init(&argc,&argv);
26 MPI_Comm_rank(MPI_COMM_WORLD,&rank);
27 MPI_Comm_size(MPI_COMM_WORLD,&size);
29 printf("size: %d\n", size);
31 for ( i=1 ; i < argc ; ++i ) {
32 if ( argv[i][0] != '-' ) {
34 fprintf(stderr, "Unrecognized option %s\n", argv[i]);fflush(stderr);
39 chunk = atoi(argv[++i]);
41 printf("chunk: %d\n", chunk);
45 fprintf(stderr, "Unrecognized argument %s\n", argv[i]);fflush(stderr);
46 MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE);
49 sb = (int *)malloc(size*chunk*sizeof(int));
51 perror( "can't allocate send buffer" );fflush(stderr);
52 MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE);
54 rb = (int *)malloc(size*chunk*sizeof(int));
56 perror( "can't allocate recv buffer");fflush(stderr);
58 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
62 /* original deino.net:
63 for ( i=0 ; i < size*chunk ; ++i ) {
64 sb[i] = sb[i] = rank + 1;
70 if (rank==0) printf("note in the following:\n"
71 "if you were to compare the sending buffer and the receiving buffer on the SAME processor, \n"
72 "you might think that the values were getting wiped out. However, each row IS going somewhere. \n"
73 "The 0th row of processor 0 goes to the 0th row of processor 0\n"
74 "The 1st row of processor 0 goes to the 0th row of processor 1. (Go look at rb for processor 1!)\n"
76 "Too bad the values don't come out in a deterministic order. That's life!\n"
78 "Now look at the receiving buffer for processor 0.\n"
79 "The 0th row is from processor 0 (itself).\n"
80 "The 1st row on processor 0 is from the 0th row on processor 1. (Go look at the sb of processor 1!)\n"
82 "Apparently this is the intended behavior.\n"
84 "Note that each row is always moved as one chunk, unchangeable.\n"
86 "TODO: draw a diagram\n"
89 for (i=0; i<size; i++) {
90 for (j=0; j<chunk; j++) {
91 int offset = i*chunk + j; // note the multiplier is chunk, not size
93 sb[offset] = rank*100 + i*10 + j;
99 // this clearly shows what is NOT indended to be done, in that the rb on a processor is the same as the sb on the processor
100 // in this intialization: on processor 0, only the 0th row gets normal values.
101 // on processor 1, only the 1st row gets normal values.
102 // when you look the rb, it looks like nothing happened. this is because, say, for processor 1, the 1st row got sent to itself.
104 for (i=0; i<size; i++) {
105 for (j=0; j<chunk; j++) {
106 int offset = i*chunk + j; // note the multiplier is chunk, not size
109 sb[offset] = rank*100 + i*10 + j;
113 rb[i*chunk + j] = 999;
118 // this does printgrid("sb", rank, size, chunk, sb);
120 printf("[processor %d] To send:\n", rank);
121 for (i=0; i<size; i++) {
122 for (j=0; j<chunk; j++) {
123 // note the multiplier is chunk, not size
124 printf("%03d ", sb[i*chunk+j]);
131 // for another variation, could send out a bunch of characters, like
132 p r o c e s s o r 0 r o w 0
133 p r o c e s s o r 0 r o w 1
134 p r o c e s s o r 0 r o w 2
138 p r o c e s s o r 0 r o w 0
139 p r o c e s s o r 1 r o w 0
140 p r o c e s s o r 2 r o w 0
144 status = MPI_Alltoall(sb, chunk, MPI_INT, rb, chunk, MPI_INT, MPI_COMM_WORLD);
146 // this does printgrid("rb", rank, size, chunk, rb);
148 printf("[processor %d] Received:\n", rank);
149 for (i=0; i<size; i++) {
150 for (j=0; j<chunk; j++) {
151 printf("%03d ", rb[i*chunk+j]);
158 MPI_Allreduce( &status, &gstatus, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
161 printf("all_to_all returned %d\n",gstatus);fflush(stdout);
172 return(EXIT_SUCCESS);