#include "colls.h"
#include <math.h>
+
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
"Logging specific to SMPI collectives");
* Auther: Ahmad Faraj
****************************************************************************/
-int alltoall_check_is_2dmesh(int num, int * i, int * j)
+int alltoall_check_is_2dmesh(int num, int *i, int *j)
{
int x, max = num / 2;
- x = sqrt(num);
-
- while (x <= max)
- {
- if ((num % x) == 0)
- {
- * i = x;
- * j = num / x;
-
- if (* i > * j)
- {
- x = * i;
- * i = * j;
- * j = x;
- }
-
- return 1;
- }
- x++;
+ x = sqrt(num);
+
+ while (x <= max) {
+ if ((num % x) == 0) {
+ *i = x;
+ *j = num / x;
+
+ if (*i > *j) {
+ x = *i;
+ *i = *j;
+ *j = x;
+ }
+
+ return 1;
}
+ x++;
+ }
return 0;
}
-int
-smpi_coll_tuned_alltoall_2dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
- void * recv_buff, int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
+ MPI_Datatype send_type,
+ void *recv_buff, int recv_count,
+ MPI_Datatype recv_type,
+ MPI_Comm comm)
{
- MPI_Status * statuses, s;
- MPI_Request * reqs, * req_ptr;;
+ MPI_Status *statuses, s;
+ MPI_Request *reqs, *req_ptr;;
MPI_Aint extent;
- char * tmp_buff1, * tmp_buff2;
+ char *tmp_buff1, *tmp_buff2;
int i, j, src, dst, rank, num_procs, count, num_reqs;
int rows, cols, my_row, my_col, X, Y, send_offset, recv_offset;
int two_dsize, my_row_base, my_col_base, src_row_base, block_size;
int tag = 1, failure = 0, success = 1;
-
+
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &num_procs);
MPI_Type_extent(send_type, &extent);
my_col_base = rank % Y;
block_size = extent * send_count;
-
- tmp_buff1 =(char *) malloc(block_size * num_procs * Y);
- if (!tmp_buff1)
- {
- XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(failure);
- }
-
- tmp_buff2 =(char *) malloc(block_size * Y);
- if (!tmp_buff2)
- {
- XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(failure);
- }
-
+
+ tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
+ if (!tmp_buff1) {
+ XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ tmp_buff2 = (char *) malloc(block_size * Y);
+ if (!tmp_buff2) {
+ XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
num_reqs = X;
- if (Y > X) num_reqs = Y;
-
- statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
- reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
- if (!reqs)
- {
- XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(failure);
- }
-
+ if (Y > X)
+ num_reqs = Y;
+
+ statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
+ reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ if (!reqs) {
+ XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
req_ptr = reqs;
send_offset = recv_offset = (rank % Y) * block_size * num_procs;
count = send_count * num_procs;
-
- for (i = 0; i < Y; i++)
- {
- src = i + my_row_base;
- if (src == rank)
- continue;
-
- recv_offset = (src % Y) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
- req_ptr++);
- }
-
- for (i = 0; i < Y; i++)
- {
- dst = i + my_row_base;
- if (dst == rank)
- continue;
- MPI_Send(send_buff, count, send_type, dst, tag, comm);
- }
-
+
+ for (i = 0; i < Y; i++) {
+ src = i + my_row_base;
+ if (src == rank)
+ continue;
+
+ recv_offset = (src % Y) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
+ req_ptr++);
+ }
+
+ for (i = 0; i < Y; i++) {
+ dst = i + my_row_base;
+ if (dst == rank)
+ continue;
+ MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ }
+
MPI_Waitall(Y - 1, reqs, statuses);
req_ptr = reqs;
-
- for (i = 0; i < Y; i++)
- {
- send_offset = (rank * block_size) + (i * block_size * num_procs);
- recv_offset = (my_row_base * block_size) + (i * block_size);
-
- if (i + my_row_base == rank)
- MPI_Sendrecv (send_buff + recv_offset, send_count, send_type,
- rank, tag, recv_buff + recv_offset, recv_count,
- recv_type, rank, tag, comm, &s);
-
- else
- MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type,
- rank, tag,
- recv_buff + recv_offset, recv_count, recv_type,
- rank, tag, comm, &s);
- }
-
- for (i = 0; i < X; i++)
- {
- src = (i * Y + my_col_base);
- if (src == rank)
- continue;
- src_row_base = (src / Y) * Y;
+ for (i = 0; i < Y; i++) {
+ send_offset = (rank * block_size) + (i * block_size * num_procs);
+ recv_offset = (my_row_base * block_size) + (i * block_size);
- MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y,
- recv_type, src, tag, comm, req_ptr++);
+ if (i + my_row_base == rank)
+ MPI_Sendrecv(send_buff + recv_offset, send_count, send_type,
+ rank, tag, recv_buff + recv_offset, recv_count,
+ recv_type, rank, tag, comm, &s);
+
+ else
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag,
+ recv_buff + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+ }
+
+
+ for (i = 0; i < X; i++) {
+ src = (i * Y + my_col_base);
+ if (src == rank)
+ continue;
+ src_row_base = (src / Y) * Y;
+
+ MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y,
+ recv_type, src, tag, comm, req_ptr++);
}
-
- for (i = 0; i < X; i++)
- {
- dst = (i * Y + my_col_base);
- if (dst == rank)
- continue;
-
- recv_offset = 0;
- for (j = 0; j < Y; j++)
- {
- send_offset = (dst + j * num_procs) * block_size;
-
- if (j + my_row_base == rank)
- MPI_Sendrecv (send_buff + dst * block_size, send_count, send_type,
- rank, tag,
- tmp_buff2 + recv_offset, recv_count, recv_type,
- rank, tag, comm, &s);
- else
- MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type,
- rank, tag,
- tmp_buff2 + recv_offset, recv_count, recv_type,
- rank, tag, comm, &s);
-
- recv_offset += block_size;
- }
-
- MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
+
+ for (i = 0; i < X; i++) {
+ dst = (i * Y + my_col_base);
+ if (dst == rank)
+ continue;
+
+ recv_offset = 0;
+ for (j = 0; j < Y; j++) {
+ send_offset = (dst + j * num_procs) * block_size;
+
+ if (j + my_row_base == rank)
+ MPI_Sendrecv(send_buff + dst * block_size, send_count, send_type,
+ rank, tag,
+ tmp_buff2 + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+ else
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag,
+ tmp_buff2 + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+
+ recv_offset += block_size;
}
+
+ MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
+ }
MPI_Waitall(X - 1, reqs, statuses);
free(reqs);
free(statuses);
free(tmp_buff1);
- free(tmp_buff2);
+ free(tmp_buff2);
return success;
}
* Auther: Ahmad Faraj
****************************************************************************/
-int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k)
+int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k)
{
int x, max = num / 3;
- x = cbrt(num);
- * i = * j = * k = 0;
- while (x <= max)
- {
- if ((num % (x * x)) == 0)
- {
- * i = * j = x;
- * k = num / (x * x);
- return 1;
- }
- x++;
+ x = cbrt(num);
+ *i = *j = *k = 0;
+ while (x <= max) {
+ if ((num % (x * x)) == 0) {
+ *i = *j = x;
+ *k = num / (x * x);
+ return 1;
}
+ x++;
+ }
return 0;
}
-int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
- void * recv_buff, int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count,
+ MPI_Datatype send_type,
+ void *recv_buff, int recv_count,
+ MPI_Datatype recv_type,
+ MPI_Comm comm)
{
- MPI_Request * reqs, * req_ptr;
+ MPI_Request *reqs, *req_ptr;
MPI_Aint extent;
- MPI_Status status, * statuses;
+ MPI_Status status, *statuses;
int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1;
- char * tmp_buff1, * tmp_buff2;
+ char *tmp_buff1, *tmp_buff2;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &num_procs);
MPI_Type_extent(send_type, &extent);
if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
- return failure;
+ return failure;
num_reqs = X;
- if (Y > X) num_reqs = Y;
- if (Z > Y) num_reqs = Z;
+ if (Y > X)
+ num_reqs = Y;
+ if (Z > Y)
+ num_reqs = Z;
two_dsize = X * Y;
- my_z = rank / two_dsize;
+ my_z = rank / two_dsize;
my_row_base = (rank / X) * X;
my_col_base = (rank % Y) + (my_z * two_dsize);
block_size = extent * send_count;
- tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize);
- if (!tmp_buff1)
- {
- printf("alltoall-3Dmesh:97: cannot allocate memory\n");
- MPI_Finalize();
- exit(failure);
- }
-
- tmp_buff2 =(char *) malloc(block_size * two_dsize);
- if (!tmp_buff2)
- {
- printf("alltoall-3Dmesh:105: cannot allocate memory\n");
- MPI_Finalize();
- exit(failure);
- }
-
- statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
- reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
- if (!reqs)
- {
- printf("alltoall-3Dmesh:113: cannot allocate memory\n");
- MPI_Finalize();
- exit(failure);
- }
-
+ tmp_buff1 = (char *) malloc(block_size * num_procs * two_dsize);
+ if (!tmp_buff1) {
+ printf("alltoall-3Dmesh:97: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ tmp_buff2 = (char *) malloc(block_size * two_dsize);
+ if (!tmp_buff2) {
+ printf("alltoall-3Dmesh:105: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
+ reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ if (!reqs) {
+ printf("alltoall-3Dmesh:113: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
req_ptr = reqs;
-
+
send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
- MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
- tmp_buff1 + recv_offset, num_procs * recv_count,
- recv_type, rank, tag, comm, &status);
+ MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
+ tmp_buff1 + recv_offset, num_procs * recv_count,
+ recv_type, rank, tag, comm, &status);
count = send_count * num_procs;
- for (i = 0; i < Y; i++)
- {
- src = i + my_row_base;
- if (src == rank) continue;
- recv_offset = (src % two_dsize) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
- req_ptr++);
- }
+ for (i = 0; i < Y; i++) {
+ src = i + my_row_base;
+ if (src == rank)
+ continue;
+ recv_offset = (src % two_dsize) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
+ req_ptr++);
+ }
- for (i = 0; i < Y; i++)
- {
- dst = i + my_row_base;
- if (dst == rank) continue;
- MPI_Send(send_buff, count, send_type, dst, tag, comm);
- }
+ for (i = 0; i < Y; i++) {
+ dst = i + my_row_base;
+ if (dst == rank)
+ continue;
+ MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ }
MPI_Waitall(Y - 1, reqs, statuses);
req_ptr = reqs;
-
-
- for (i = 0; i < X; i++)
- {
- src = (i * Y + my_col_base);
- if (src == rank) continue;
-
- src_row_base = (src / X) * X;
-
- recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
- recv_type, src, tag, comm, req_ptr++);
- }
- send_offset = (my_row_base % two_dsize) * block_size * num_procs;
- for (i = 0; i < X; i++)
- {
- dst = (i * Y + my_col_base);
- if (dst == rank) continue;
- MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
- dst, tag, comm);
- }
-
+
+ for (i = 0; i < X; i++) {
+ src = (i * Y + my_col_base);
+ if (src == rank)
+ continue;
+
+ src_row_base = (src / X) * X;
+
+ recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
+ recv_type, src, tag, comm, req_ptr++);
+ }
+
+ send_offset = (my_row_base % two_dsize) * block_size * num_procs;
+ for (i = 0; i < X; i++) {
+ dst = (i * Y + my_col_base);
+ if (dst == rank)
+ continue;
+ MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
+ dst, tag, comm);
+ }
+
MPI_Waitall(X - 1, reqs, statuses);
req_ptr = reqs;
- for (i = 0; i < two_dsize; i++)
- {
- send_offset = (rank * block_size) + (i * block_size * num_procs);
- recv_offset = (my_z_base * block_size) + (i * block_size);
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
- recv_buff + recv_offset, recv_count, recv_type, rank, tag,
- comm, &status);
- }
+ for (i = 0; i < two_dsize; i++) {
+ send_offset = (rank * block_size) + (i * block_size * num_procs);
+ recv_offset = (my_z_base * block_size) + (i * block_size);
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
+ recv_buff + recv_offset, recv_count, recv_type, rank, tag,
+ comm, &status);
+ }
- for (i = 1; i < Z; i++)
- {
- src = (rank + i * two_dsize) % num_procs;
- src_z_base = (src / two_dsize) * two_dsize;
+ for (i = 1; i < Z; i++) {
+ src = (rank + i * two_dsize) % num_procs;
+ src_z_base = (src / two_dsize) * two_dsize;
- recv_offset = (src_z_base * block_size);
+ recv_offset = (src_z_base * block_size);
- MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type,
- src, tag, comm, req_ptr++);
+ MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type,
+ src, tag, comm, req_ptr++);
}
- for (i = 1; i < Z; i++)
- {
- dst = (rank + i * two_dsize) % num_procs;
-
- recv_offset = 0;
- for (j = 0; j < two_dsize; j++)
- {
- send_offset = (dst + j * num_procs) * block_size;
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
- rank, tag, tmp_buff2 + recv_offset, recv_count,
- recv_type, rank, tag, comm, &status);
-
- recv_offset += block_size;
- }
-
- MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
-
+ for (i = 1; i < Z; i++) {
+ dst = (rank + i * two_dsize) % num_procs;
+
+ recv_offset = 0;
+ for (j = 0; j < two_dsize; j++) {
+ send_offset = (dst + j * num_procs) * block_size;
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag, tmp_buff2 + recv_offset, recv_count,
+ recv_type, rank, tag, comm, &status);
+
+ recv_offset += block_size;
}
-
+
+ MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
+
+ }
+
MPI_Waitall(Z - 1, reqs, statuses);
free(reqs);
#include "smpi/mpi.h"
+
/*****************************************************************************
* Function: alltoall_pair
****************************************************************************/
/*
-int
-alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type,
- void * recv_buff, int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int alltoall_pair(void *send_buff, int send_count, MPI_Datatype send_type,
+ void *recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
{
MPI_Aint send_chunk, recv_chunk;
int i, src, dst, rank, num_procs;
int tag = 1, success = 1, failure = 0, pof2 = 1;
- char * send_ptr = (char *) send_buff;
- char * recv_ptr = (char *) recv_buff;
-
+ char *send_ptr = (char *) send_buff;
+ char *recv_ptr = (char *) recv_buff;
+
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &num_procs);
MPI_Type_extent(send_type, &send_chunk);
MPI_Type_extent(recv_type, &recv_chunk);
- MPI_Win_create(recv_buff, num_procs*recv_chunk*send_count,recv_chunk,0,
- comm, &win);
+ MPI_Win_create(recv_buff, num_procs * recv_chunk * send_count, recv_chunk, 0,
+ comm, &win);
send_chunk *= send_count;
- recv_chunk *= recv_count;
+ recv_chunk *= recv_count;
MPI_Win_fence(assert, win);
- for (i = 0; i < num_procs; i++)
- {
- src = dst = rank ^ i;
- MPI_Put(send_ptr + dst * send_chunk, send_count, send_type, dst,
- rank*send_chunk, send_count, send_type, win);
- }
- MPI_Win_fence (assert, win);
+ for (i = 0; i < num_procs; i++) {
+ src = dst = rank ^ i;
+ MPI_Put(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ rank * send_chunk, send_count, send_type, win);
+ }
+ MPI_Win_fence(assert, win);
MPI_Win_free(&win);
return 0;
}
*/
-int
-smpi_coll_tuned_alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type,
- void * recv_buff, int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count,
+ MPI_Datatype send_type,
+ void *recv_buff, int recv_count,
+ MPI_Datatype recv_type,
+ MPI_Comm comm)
{
MPI_Aint send_chunk, recv_chunk;
int i, src, dst, rank, num_procs;
int tag = 1, success = 1;
- char * send_ptr = (char *) send_buff;
- char * recv_ptr = (char *) recv_buff;
-
+ char *send_ptr = (char *) send_buff;
+ char *recv_ptr = (char *) recv_buff;
+
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &num_procs);
MPI_Type_extent(send_type, &send_chunk);
MPI_Type_extent(recv_type, &recv_chunk);
send_chunk *= send_count;
- recv_chunk *= recv_count;
+ recv_chunk *= recv_count;
- for (i = 0; i < num_procs; i++)
- {
- src = dst = rank ^ i;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
- tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
- src, tag, comm, &s);
- }
+ for (i = 0; i < num_procs; i++) {
+ src = dst = rank ^ i;
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
return success;
}
-
#include "colls.h"
+
/*****************************************************************************
* Function: alltoall_rdb
* Auther: MPICH / slightly modified by Ahmad Faraj.
****************************************************************************/
-int
-smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send_type,
- void * recv_buff, int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count,
+ MPI_Datatype send_type,
+ void *recv_buff, int recv_count,
+ MPI_Datatype recv_type,
+ MPI_Comm comm)
{
/* MPI variables */
MPI_Status status;
int last_recv_count, tmp_mask, tree_root, num_procs_completed;
int tag = 1, mask = 1, success = 1, failure = 0, c = 0, i = 0;
- char * tmp_buff;
- char * send_ptr = (char *) send_buff;
- char * recv_ptr = (char *) recv_buff;
+ char *tmp_buff;
+ char *send_ptr = (char *) send_buff;
+ char *recv_ptr = (char *) recv_buff;
MPI_Comm_size(comm, &num_procs);
MPI_Comm_rank(comm, &rank);
send_increment *= (send_count * num_procs);
recv_increment *= (recv_count * num_procs);
-
+
max_size = num_procs * recv_increment;
-
- tmp_buff = (char * ) malloc(max_size);
- if (!tmp_buff)
- {
- printf("alltoall-rdb:56: cannot allocate memory\n");
- MPI_Finalize();
- exit(failure);
- }
+
+ tmp_buff = (char *) malloc(max_size);
+ if (!tmp_buff) {
+ printf("alltoall-rdb:56: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
curr_size = send_count * num_procs;
tmp_buff + (rank * recv_increment),
curr_size, recv_type, rank, tag, comm, &status);
- while (mask < num_procs)
- {
- dst = rank ^ mask;
- dst_tree_root = dst >> i;
- dst_tree_root <<= i;
- rank_tree_root = rank >> i;
- rank_tree_root <<= i;
- send_offset = rank_tree_root * send_increment;
- recv_offset = dst_tree_root * recv_increment;
-
- if (dst < num_procs)
- {
- MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag,
- tmp_buff + recv_offset, mask * recv_count * num_procs,
- recv_type, dst, tag, comm, &status);
-
- MPI_Get_count(&status, recv_type, &last_recv_count);
- curr_size += last_recv_count;
- }
-
-
- if (dst_tree_root + mask > num_procs)
- {
-
- num_procs_completed = num_procs - rank_tree_root - mask;
- /* num_procs_completed is the number of processes in this
- subtree that have all the data. Send data to others
- in a tree fashion. First find root of current tree
- that is being divided into two. k is the number of
- least-significant bits in this process's rank that
- must be zeroed out to find the rank of the root */
-
- j = mask;
- k = 0;
- while (j)
- {
- j >>= 1;
- k++;
- }
- k--;
-
- tmp_mask = mask >> 1;
-
- while (tmp_mask)
- {
- dst = rank ^ tmp_mask;
-
- tree_root = rank >> k;
- tree_root <<= k;
-
- /* send only if this proc has data and destination
- doesn't have data. at any step, multiple processes
- can send if they have the data */
-
- if ((dst > rank)
- && (rank < tree_root + num_procs_completed)
- && (dst >= tree_root + num_procs_completed))
- {
- MPI_Send(tmp_buff + dst_tree_root * send_increment,
- last_recv_count, send_type, dst, tag, comm);
-
- }
-
- /* recv only if this proc. doesn't have data and sender
- has data */
-
- else if ((dst < rank)
- && (dst < tree_root + num_procs_completed)
- && (rank >= tree_root + num_procs_completed))
- {
- MPI_Recv(tmp_buff + dst_tree_root * send_increment,
- mask * num_procs * send_count, send_type, dst,
- tag, comm, &status);
-
- MPI_Get_count(&status, send_type, &last_recv_count);
- curr_size += last_recv_count;
- }
-
- tmp_mask >>= 1;
- k--;
- }
- }
-
- mask <<= 1;
- i++;
+ while (mask < num_procs) {
+ dst = rank ^ mask;
+ dst_tree_root = dst >> i;
+ dst_tree_root <<= i;
+ rank_tree_root = rank >> i;
+ rank_tree_root <<= i;
+ send_offset = rank_tree_root * send_increment;
+ recv_offset = dst_tree_root * recv_increment;
+
+ if (dst < num_procs) {
+ MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag,
+ tmp_buff + recv_offset, mask * recv_count * num_procs,
+ recv_type, dst, tag, comm, &status);
+
+ MPI_Get_count(&status, recv_type, &last_recv_count);
+ curr_size += last_recv_count;
+ }
+
+
+ if (dst_tree_root + mask > num_procs) {
+
+ num_procs_completed = num_procs - rank_tree_root - mask;
+ /* num_procs_completed is the number of processes in this
+ subtree that have all the data. Send data to others
+ in a tree fashion. First find root of current tree
+ that is being divided into two. k is the number of
+ least-significant bits in this process's rank that
+ must be zeroed out to find the rank of the root */
+
+ j = mask;
+ k = 0;
+ while (j) {
+ j >>= 1;
+ k++;
+ }
+ k--;
+
+ tmp_mask = mask >> 1;
+
+ while (tmp_mask) {
+ dst = rank ^ tmp_mask;
+
+ tree_root = rank >> k;
+ tree_root <<= k;
+
+ /* send only if this proc has data and destination
+ doesn't have data. at any step, multiple processes
+ can send if they have the data */
+
+ if ((dst > rank)
+ && (rank < tree_root + num_procs_completed)
+ && (dst >= tree_root + num_procs_completed)) {
+ MPI_Send(tmp_buff + dst_tree_root * send_increment,
+ last_recv_count, send_type, dst, tag, comm);
+
+ }
+
+ /* recv only if this proc. doesn't have data and sender
+ has data */
+
+ else if ((dst < rank)
+ && (dst < tree_root + num_procs_completed)
+ && (rank >= tree_root + num_procs_completed)) {
+ MPI_Recv(tmp_buff + dst_tree_root * send_increment,
+ mask * num_procs * send_count, send_type, dst,
+ tag, comm, &status);
+
+ MPI_Get_count(&status, send_type, &last_recv_count);
+ curr_size += last_recv_count;
+ }
+
+ tmp_mask >>= 1;
+ k--;
+ }
}
+ mask <<= 1;
+ i++;
+ }
+
for (i = 0; i < num_procs; i++)
- MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent,
- send_count, send_type, rank, tag,
- recv_ptr + (i * recv_count * extent),
- recv_count, recv_type, rank, tag, comm, &status);
+ MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent,
+ send_count, send_type, rank, tag,
+ recv_ptr + (i * recv_count * extent),
+ recv_count, recv_type, rank, tag, comm, &status);
free(tmp_buff);
return success;
}
* Auther: Ahmad Faraj
****************************************************************************/
-int
-smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count,
- MPI_Datatype send_type, void * recv_buff,
- int recv_count, MPI_Datatype recv_type,
- MPI_Comm comm)
+int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count,
+ MPI_Datatype send_type,
+ void *recv_buff, int recv_count,
+ MPI_Datatype recv_type,
+ MPI_Comm comm)
{
int i, rank, size, nreqs, err, src, dst, tag = 101;
char *psnd;
MPI_Request *req;
MPI_Request *preq;
MPI_Request *qreq;
- MPI_Status s, * statuses;
+ MPI_Status s, *statuses;
MPI_Comm_size(comm, &size);
/* Allocate arrays of requests. */
nreqs = 2 * (size - 1);
- if (nreqs > 0)
- {
- req = (MPI_Request *) malloc(nreqs * sizeof(MPI_Request));
- statuses = (MPI_Status *) malloc(nreqs * sizeof(MPI_Status));
- if (!req || !statuses)
- {
- free(req);
- free(statuses);
- return 0;
- }
+ if (nreqs > 0) {
+ req = (MPI_Request *) malloc(nreqs * sizeof(MPI_Request));
+ statuses = (MPI_Status *) malloc(nreqs * sizeof(MPI_Status));
+ if (!req || !statuses) {
+ free(req);
+ free(statuses);
+ return 0;
}
- else
+ } else
req = 0;
/* simple optimization */
psnd = ((char *) send_buff) + (rank * sndinc);
prcv = ((char *) recv_buff) + (rank * rcvinc);
- MPI_Sendrecv (psnd, send_count, send_type, rank, tag,
- prcv, recv_count, recv_type,
- rank, tag, comm, &s);
+ MPI_Sendrecv(psnd, send_count, send_type, rank, tag,
+ prcv, recv_count, recv_type, rank, tag, comm, &s);
/* Initiate all send/recv to/from others. */
preq = req;
qreq = req + size - 1;
- prcv = (char*) recv_buff;
- psnd = (char*) send_buff;
- for (i = 0; i < size; i++)
- {
- src = dst = (rank + i) % size;
- if (src == rank) continue;
- if (dst == rank) continue;
- MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src,
- tag, comm, preq++);
- MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst,
- tag, comm, qreq++);
- }
+ prcv = (char *) recv_buff;
+ psnd = (char *) send_buff;
+ for (i = 0; i < size; i++) {
+ src = dst = (rank + i) % size;
+ if (src == rank)
+ continue;
+ if (dst == rank)
+ continue;
+ MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src,
+ tag, comm, preq++);
+ MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst,
+ tag, comm, qreq++);
+ }
/* Start all the requests. */
err = MPI_Request_free(preq);
if (err != MPI_SUCCESS) {
if (req)
- free((char *) req);
+ free((char *) req);
if (statuses)
- free(statuses);
+ free(statuses);
return err;
}
}
free(statuses);
return (1);
}
-
-