- int i, j, extent, myrank, p, n[2], m[2];
- int lasti, lastj;
- int *sendcounts, *recvcounts;
- int *sdispls, *rdispls;
- MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes;
-
- MTestPrintfMsg( 2, "M = %d, N = %d\n", M, N );
-
- /* compute parameters */
- MPI_Comm_size(comm, &p);
- MPI_Comm_rank(comm, &myrank);
- extent = sizeof(float);
-
- /* allocate arrays */
- sendcounts = (int *)malloc(p*sizeof(int));
- recvcounts = (int *)malloc(p*sizeof(int));
- sdispls = (int *)malloc(p*sizeof(int));
- rdispls = (int *)malloc(p*sizeof(int));
- sendtypes = (MPI_Datatype *)malloc(p*sizeof(MPI_Datatype));
- recvtypes = (MPI_Datatype *)malloc(p*sizeof(MPI_Datatype));
-
- /* compute block sizes */
- m[0] = M/p;
- m[1] = M - (p-1)*(M/p);
- n[0] = N/p;
- n[1] = N - (p-1)*(N/p);
-
- /* compute types */
- for (i=0; i <= 1; i++)
- for (j=0; j <= 1; j++) {
- xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT);
- stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT);
- }
-
- /* prepare collective operation arguments */
- lasti = myrank == p-1;
- for (j=0; j < p; j++) {
- lastj = j == p-1;
- sendcounts[j] = 1;
- sdispls[j] = j*n[0]*extent;
- sendtypes[j] = xtype[lasti][lastj];
- recvcounts[j] = 1;
- rdispls[j] = j*m[0]*extent;
- recvtypes[j] = stype[lastj][lasti];
- }
-
- /* communicate */
- MTestPrintfMsg( 2, "Begin Alltoallw...\n" );
- /* -- Note that the book incorrectly uses &localA and &localB
- as arguments to MPI_Alltoallw */
- MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes,
- localB, recvcounts, rdispls, recvtypes, comm);
- MTestPrintfMsg( 2, "Done with Alltoallw\n" );
-
- /* Free buffers */
- free( sendcounts );
- free( recvcounts );
- free( sdispls );
- free( rdispls );
- free( sendtypes );
- free( recvtypes );
-
- /* Free datatypes */
- for (i=0; i <= 1; i++)
- for (j=0; j <= 1; j++) {
- MPI_Type_free( &xtype[i][j] );
- MPI_Type_free( &stype[i][j] );
- }
+ int i, j, extent, myrank, p, n[2], m[2];
+ int lasti, lastj;
+ int *sendcounts, *recvcounts;
+ int *sdispls, *rdispls;
+ MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes;
+
+ MTestPrintfMsg(2, "M = %d, N = %d\n", M, N);
+
+ /* compute parameters */
+ MPI_Comm_size(comm, &p);
+ MPI_Comm_rank(comm, &myrank);
+ extent = sizeof(float);
+
+ /* allocate arrays */
+ sendcounts = (int *) malloc(p * sizeof(int));
+ recvcounts = (int *) malloc(p * sizeof(int));
+ sdispls = (int *) malloc(p * sizeof(int));
+ rdispls = (int *) malloc(p * sizeof(int));
+ sendtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
+ recvtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
+
+ /* compute block sizes */
+ m[0] = M / p;
+ m[1] = M - (p - 1) * (M / p);
+ n[0] = N / p;
+ n[1] = N - (p - 1) * (N / p);
+
+ /* compute types */
+ for (i = 0; i <= 1; i++)
+ for (j = 0; j <= 1; j++) {
+ xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT);
+ stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT);
+ }
+
+ /* prepare collective operation arguments */
+ lasti = myrank == p - 1;
+ for (j = 0; j < p; j++) {
+ lastj = j == p - 1;
+ sendcounts[j] = 1;
+ sdispls[j] = j * n[0] * extent;
+ sendtypes[j] = xtype[lasti][lastj];
+ recvcounts[j] = 1;
+ rdispls[j] = j * m[0] * extent;
+ recvtypes[j] = stype[lastj][lasti];
+ }
+
+ /* communicate */
+ MTestPrintfMsg(2, "Begin Alltoallw...\n");
+ /* -- Note that the book incorrectly uses &localA and &localB
+ * as arguments to MPI_Alltoallw */
+ MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes,
+ localB, recvcounts, rdispls, recvtypes, comm);
+ MTestPrintfMsg(2, "Done with Alltoallw\n");
+
+ /* Free buffers */
+ free(sendcounts);
+ free(recvcounts);
+ free(sdispls);
+ free(rdispls);
+ free(sendtypes);
+ free(recvtypes);
+
+ /* Free datatypes */
+ for (i = 0; i <= 1; i++)
+ for (j = 0; j <= 1; j++) {
+ MPI_Type_free(&xtype[i][j]);
+ MPI_Type_free(&stype[i][j]);
+ }