teshsuite/smpi/mpich3-test/coll/alltoallw1.c

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /*
   3  *  Changes to this example
   4  *  (C) 2001 by Argonne National Laboratory.
   5  *      See COPYRIGHT in top-level directory.
   6  */
   7
   8 /*
   9  * This example is taken from MPI-The complete reference, Vol 1,
  10  * pages 222-224.
  11  *
  12  * Lines after the "--CUT HERE--" were added to make this into a complete
  13  * test program.
  14  */
  15
  16 /* Specify the maximum number of errors to report. */
  17 #define MAX_ERRORS 10
  18
  19 #include "mpi.h"
  20 #include "mpitest.h"
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23
  24 #define MAX_SIZE 64
  25
  26 MPI_Datatype transpose_type(int M, int m, int n, MPI_Datatype type);
  27 MPI_Datatype submatrix_type(int N, int m, int n, MPI_Datatype type);
  28 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm);
  29 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm)
  30 /* transpose MxN matrix A that is block distributed (1-D) on
  31    processes of comm onto block distributed matrix B  */
  32 {
  33     int i, j, extent, myrank, p, n[2], m[2];
  34     int lasti, lastj;
  35     int *sendcounts, *recvcounts;
  36     int *sdispls, *rdispls;
  37     MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes;
  38
  39     MTestPrintfMsg(2, "M = %d, N = %d\n", M, N);
  40
  41     /* compute parameters */
  42     MPI_Comm_size(comm, &p);
  43     MPI_Comm_rank(comm, &myrank);
  44     extent = sizeof(float);
  45
  46     /* allocate arrays */
  47     sendcounts = (int *) malloc(p * sizeof(int));
  48     recvcounts = (int *) malloc(p * sizeof(int));
  49     sdispls = (int *) malloc(p * sizeof(int));
  50     rdispls = (int *) malloc(p * sizeof(int));
  51     sendtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
  52     recvtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
  53
  54     /* compute block sizes */
  55     m[0] = M / p;
  56     m[1] = M - (p - 1) * (M / p);
  57     n[0] = N / p;
  58     n[1] = N - (p - 1) * (N / p);
  59
  60     /* compute types */
  61     for (i = 0; i <= 1; i++)
  62         for (j = 0; j <= 1; j++) {
  63             xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT);
  64             stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT);
  65         }
  66
  67     /* prepare collective operation arguments */
  68     lasti = myrank == p - 1;
  69     for (j = 0; j < p; j++) {
  70         lastj = j == p - 1;
  71         sendcounts[j] = 1;
  72         sdispls[j] = j * n[0] * extent;
  73         sendtypes[j] = xtype[lasti][lastj];
  74         recvcounts[j] = 1;
  75         rdispls[j] = j * m[0] * extent;
  76         recvtypes[j] = stype[lastj][lasti];
  77     }
  78
  79     /* communicate */
  80     MTestPrintfMsg(2, "Begin Alltoallw...\n");
  81     /* -- Note that the book incorrectly uses &localA and &localB
  82      * as arguments to MPI_Alltoallw */
  83     MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes,
  84                   localB, recvcounts, rdispls, recvtypes, comm);
  85     MTestPrintfMsg(2, "Done with Alltoallw\n");
  86
  87     /* Free buffers */
  88     free(sendcounts);
  89     free(recvcounts);
  90     free(sdispls);
  91     free(rdispls);
  92     free(sendtypes);
  93     free(recvtypes);
  94
  95     /* Free datatypes */
  96     for (i = 0; i <= 1; i++)
  97         for (j = 0; j <= 1; j++) {
  98             MPI_Type_free(&xtype[i][j]);
  99             MPI_Type_free(&stype[i][j]);
 100         }
 101 }
 102
 103
 104 /* Define an n x m submatrix in a n x M local matrix (this is the
 105    destination in the transpose matrix */
 106 MPI_Datatype submatrix_type(int M, int m, int n, MPI_Datatype type)
 107 /* computes a datatype for an mxn submatrix within an MxN matrix
 108    with entries of type type */
 109 {
 110     /* MPI_Datatype subrow; */
 111     MPI_Datatype submatrix;
 112
 113     /* The book, MPI: The Complete Reference, has the wrong type constructor
 114      * here.  Since the stride in the vector type is relative to the input
 115      * type, the stride in the book's code is n times as long as is intended.
 116      * Since n may not exactly divide N, it is better to simply use the
 117      * blocklength argument in Type_vector */
 118     /*
 119      * MPI_Type_contiguous(n, type, &subrow);
 120      * MPI_Type_vector(m, 1, N, subrow, &submatrix);
 121      */
 122     MPI_Type_vector(n, m, M, type, &submatrix);
 123     MPI_Type_commit(&submatrix);
 124
 125     /* Add a consistency test: the size of submatrix should be
 126      * n * m * sizeof(type) and the extent should be ((n-1)*M+m) * sizeof(type) */
 127     {
 128         int tsize;
 129         MPI_Aint textent, lb;
 130         MPI_Type_size(type, &tsize);
 131         MPI_Type_get_extent(submatrix, &lb, &textent);
 132
 133         if (textent != tsize * (M * (n - 1) + m)) {
 134             fprintf(stderr, "Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",
 135                     (long) textent, (long) (tsize * (M * (n - 1) + m)), M, n, m);
 136         }
 137     }
 138     return (submatrix);
 139 }
 140
 141 /* Extract an m x n submatrix within an m x N matrix and transpose it.
 142    Assume storage by rows; the defined datatype accesses by columns */
 143 MPI_Datatype transpose_type(int N, int m, int n, MPI_Datatype type)
 144 /* computes a datatype for the transpose of an mxn matrix
 145    with entries of type type */
 146 {
 147     MPI_Datatype subrow, subrow1, submatrix;
 148     MPI_Aint lb, extent;
 149
 150     MPI_Type_vector(m, 1, N, type, &subrow);
 151     MPI_Type_get_extent(type, &lb, &extent);
 152     MPI_Type_create_resized(subrow, 0, extent, &subrow1);
 153     MPI_Type_contiguous(n, subrow1, &submatrix);
 154     MPI_Type_commit(&submatrix);
 155     MPI_Type_free(&subrow);
 156     MPI_Type_free(&subrow1);
 157
 158     /* Add a consistency test: the size of submatrix should be
 159      * n * m * sizeof(type) and the extent should be ((m-1)*N+n) * sizeof(type) */
 160     {
 161         int tsize;
 162         MPI_Aint textent, llb;
 163         MPI_Type_size(type, &tsize);
 164         MPI_Type_get_true_extent(submatrix, &llb, &textent);
 165
 166 /*        if (textent != tsize * (N * (m - 1) + n)) {*/
 167 /*            fprintf(stderr, "Transpose Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",*/
 168 /*                    (long) textent, (long) (tsize * (N * (m - 1) + n)), N, n, m);*/
 169 /*        }*/
 170     }
 171
 172     return (submatrix);
 173 }
 174
 175 /* -- CUT HERE -- */
 176
 177 int main(int argc, char *argv[])
 178 {
 179     int gM, gN, lm, lmlast, ln, lnlast, i, j, errs = 0;
 180     int size, rank;
 181     float *localA, *localB;
 182     MPI_Comm comm;
 183
 184     MTest_Init(&argc, &argv);
 185     comm = MPI_COMM_WORLD;
 186
 187     MPI_Comm_size(comm, &size);
 188     MPI_Comm_rank(comm, &rank);
 189
 190     gM = 20;
 191     gN = 30;
 192
 193     /* Each block is lm x ln in size, except for the last process,
 194      * which has lmlast x lnlast */
 195     lm = gM / size;
 196     lmlast = gM - (size - 1) * lm;
 197     ln = gN / size;
 198     lnlast = gN - (size - 1) * ln;
 199
 200     /* Create the local matrices.
 201      * Initialize the input matrix so that the entries are
 202      * consequtive integers, by row, starting at 0.
 203      */
 204     if (rank == size - 1) {
 205         localA = (float *) malloc(gN * lmlast * sizeof(float));
 206         localB = (float *) malloc(gM * lnlast * sizeof(float));
 207         for (i = 0; i < lmlast; i++) {
 208             for (j = 0; j < gN; j++) {
 209                 localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm);
 210             }
 211         }
 212
 213     }
 214     else {
 215         localA = (float *) malloc(gN * lm * sizeof(float));
 216         localB = (float *) malloc(gM * ln * sizeof(float));
 217         for (i = 0; i < lm; i++) {
 218             for (j = 0; j < gN; j++) {
 219                 localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm);
 220             }
 221         }
 222     }
 223
 224     MTestPrintfMsg(2, "Allocated local arrays\n");
 225     /* Transpose */
 226     Transpose(localA, localB, gM, gN, comm);
 227
 228     /* check the transposed matrix
 229      * In the global matrix, the transpose has consequtive integers,
 230      * organized by columns.
 231      */
 232     if (rank == size - 1) {
 233         for (i = 0; i < lnlast; i++) {
 234             for (j = 0; j < gM; j++) {
 235                 int expected = i + gN * j + rank * ln;
 236                 if ((int) localB[i * gM + j] != expected) {
 237                     if (errs < MAX_ERRORS)
 238                         printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected);
 239                     errs++;
 240                 }
 241             }
 242         }
 243
 244     }
 245     else {
 246         for (i = 0; i < ln; i++) {
 247             for (j = 0; j < gM; j++) {
 248                 int expected = i + gN * j + rank * ln;
 249                 if ((int) localB[i * gM + j] != expected) {
 250                     if (errs < MAX_ERRORS)
 251                         printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected);
 252                     errs++;
 253                 }
 254             }
 255         }
 256     }
 257
 258     /* Free storage */
 259     free(localA);
 260     free(localB);
 261
 262     MTest_Finalize(errs);
 263
 264     MPI_Finalize();
 265
 266     return 0;
 267 }