From 1b688c83e1d48eda7a3392c2a7f0c3c540e77f7d Mon Sep 17 00:00:00 2001 From: Arnaud Giersch Date: Mon, 25 Mar 2013 22:21:15 +0100 Subject: [PATCH] Reindent files before changes. --- src/smpi/colls/alltoall-2dmesh.c | 251 +++++++++++++++---------------- src/smpi/colls/alltoall-3dmesh.c | 235 ++++++++++++++--------------- src/smpi/colls/alltoall-pair.c | 64 ++++---- src/smpi/colls/alltoall-rdb.c | 204 ++++++++++++------------- src/smpi/colls/alltoall-simple.c | 67 ++++----- 5 files changed, 397 insertions(+), 424 deletions(-) diff --git a/src/smpi/colls/alltoall-2dmesh.c b/src/smpi/colls/alltoall-2dmesh.c index 1e1e408885..2b1a2a05b7 100644 --- a/src/smpi/colls/alltoall-2dmesh.c +++ b/src/smpi/colls/alltoall-2dmesh.c @@ -1,5 +1,6 @@ #include "colls.h" #include + XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi, "Logging specific to SMPI collectives"); @@ -26,47 +27,45 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi, * Auther: Ahmad Faraj ****************************************************************************/ -int alltoall_check_is_2dmesh(int num, int * i, int * j) +int alltoall_check_is_2dmesh(int num, int *i, int *j) { int x, max = num / 2; - x = sqrt(num); - - while (x <= max) - { - if ((num % x) == 0) - { - * i = x; - * j = num / x; - - if (* i > * j) - { - x = * i; - * i = * j; - * j = x; - } - - return 1; - } - x++; + x = sqrt(num); + + while (x <= max) { + if ((num % x) == 0) { + *i = x; + *j = num / x; + + if (*i > *j) { + x = *i; + *i = *j; + *j = x; + } + + return 1; } + x++; + } return 0; } -int -smpi_coll_tuned_alltoall_2dmesh(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, + MPI_Comm comm) { - MPI_Status * statuses, s; - MPI_Request * reqs, * req_ptr;; + MPI_Status *statuses, s; + MPI_Request *reqs, *req_ptr;; MPI_Aint extent; - char * tmp_buff1, * tmp_buff2; + char *tmp_buff1, *tmp_buff2; int i, j, src, dst, rank, num_procs, count, num_reqs; int rows, cols, my_row, my_col, X, Y, send_offset, recv_offset; int two_dsize, my_row_base, my_col_base, src_row_base, block_size; int tag = 1, failure = 0, success = 1; - + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); MPI_Type_extent(send_type, &extent); @@ -80,125 +79,117 @@ smpi_coll_tuned_alltoall_2dmesh(void * send_buff, int send_count, MPI_Datatype s my_col_base = rank % Y; block_size = extent * send_count; - - tmp_buff1 =(char *) malloc(block_size * num_procs * Y); - if (!tmp_buff1) - { - XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(failure); - } - - tmp_buff2 =(char *) malloc(block_size * Y); - if (!tmp_buff2) - { - XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(failure); - } - + + tmp_buff1 = (char *) malloc(block_size * num_procs * Y); + if (!tmp_buff1) { + XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); + MPI_Finalize(); + exit(failure); + } + + tmp_buff2 = (char *) malloc(block_size * Y); + if (!tmp_buff2) { + XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); + MPI_Finalize(); + exit(failure); + } + num_reqs = X; - if (Y > X) num_reqs = Y; - - statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); - reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); - if (!reqs) - { - XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(failure); - } - + if (Y > X) + num_reqs = Y; + + statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); + reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); + if (!reqs) { + XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); + MPI_Finalize(); + exit(failure); + } + req_ptr = reqs; send_offset = recv_offset = (rank % Y) * block_size * num_procs; count = send_count * num_procs; - - for (i = 0; i < Y; i++) - { - src = i + my_row_base; - if (src == rank) - continue; - - recv_offset = (src % Y) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, - req_ptr++); - } - - for (i = 0; i < Y; i++) - { - dst = i + my_row_base; - if (dst == rank) - continue; - MPI_Send(send_buff, count, send_type, dst, tag, comm); - } - + + for (i = 0; i < Y; i++) { + src = i + my_row_base; + if (src == rank) + continue; + + recv_offset = (src % Y) * block_size * num_procs; + MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, + req_ptr++); + } + + for (i = 0; i < Y; i++) { + dst = i + my_row_base; + if (dst == rank) + continue; + MPI_Send(send_buff, count, send_type, dst, tag, comm); + } + MPI_Waitall(Y - 1, reqs, statuses); req_ptr = reqs; - - for (i = 0; i < Y; i++) - { - send_offset = (rank * block_size) + (i * block_size * num_procs); - recv_offset = (my_row_base * block_size) + (i * block_size); - - if (i + my_row_base == rank) - MPI_Sendrecv (send_buff + recv_offset, send_count, send_type, - rank, tag, recv_buff + recv_offset, recv_count, - recv_type, rank, tag, comm, &s); - - else - MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type, - rank, tag, - recv_buff + recv_offset, recv_count, recv_type, - rank, tag, comm, &s); - } - - for (i = 0; i < X; i++) - { - src = (i * Y + my_col_base); - if (src == rank) - continue; - src_row_base = (src / Y) * Y; + for (i = 0; i < Y; i++) { + send_offset = (rank * block_size) + (i * block_size * num_procs); + recv_offset = (my_row_base * block_size) + (i * block_size); - MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y, - recv_type, src, tag, comm, req_ptr++); + if (i + my_row_base == rank) + MPI_Sendrecv(send_buff + recv_offset, send_count, send_type, + rank, tag, recv_buff + recv_offset, recv_count, + recv_type, rank, tag, comm, &s); + + else + MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + rank, tag, + recv_buff + recv_offset, recv_count, recv_type, + rank, tag, comm, &s); + } + + + for (i = 0; i < X; i++) { + src = (i * Y + my_col_base); + if (src == rank) + continue; + src_row_base = (src / Y) * Y; + + MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y, + recv_type, src, tag, comm, req_ptr++); } - - for (i = 0; i < X; i++) - { - dst = (i * Y + my_col_base); - if (dst == rank) - continue; - - recv_offset = 0; - for (j = 0; j < Y; j++) - { - send_offset = (dst + j * num_procs) * block_size; - - if (j + my_row_base == rank) - MPI_Sendrecv (send_buff + dst * block_size, send_count, send_type, - rank, tag, - tmp_buff2 + recv_offset, recv_count, recv_type, - rank, tag, comm, &s); - else - MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type, - rank, tag, - tmp_buff2 + recv_offset, recv_count, recv_type, - rank, tag, comm, &s); - - recv_offset += block_size; - } - - MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); + + for (i = 0; i < X; i++) { + dst = (i * Y + my_col_base); + if (dst == rank) + continue; + + recv_offset = 0; + for (j = 0; j < Y; j++) { + send_offset = (dst + j * num_procs) * block_size; + + if (j + my_row_base == rank) + MPI_Sendrecv(send_buff + dst * block_size, send_count, send_type, + rank, tag, + tmp_buff2 + recv_offset, recv_count, recv_type, + rank, tag, comm, &s); + else + MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + rank, tag, + tmp_buff2 + recv_offset, recv_count, recv_type, + rank, tag, comm, &s); + + recv_offset += block_size; } + + MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); + } MPI_Waitall(X - 1, reqs, statuses); free(reqs); free(statuses); free(tmp_buff1); - free(tmp_buff2); + free(tmp_buff2); return success; } diff --git a/src/smpi/colls/alltoall-3dmesh.c b/src/smpi/colls/alltoall-3dmesh.c index afd47ce6b8..ed734e5b1f 100644 --- a/src/smpi/colls/alltoall-3dmesh.c +++ b/src/smpi/colls/alltoall-3dmesh.c @@ -24,50 +24,52 @@ * Auther: Ahmad Faraj ****************************************************************************/ -int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k) +int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k) { int x, max = num / 3; - x = cbrt(num); - * i = * j = * k = 0; - while (x <= max) - { - if ((num % (x * x)) == 0) - { - * i = * j = x; - * k = num / (x * x); - return 1; - } - x++; + x = cbrt(num); + *i = *j = *k = 0; + while (x <= max) { + if ((num % (x * x)) == 0) { + *i = *j = x; + *k = num / (x * x); + return 1; } + x++; + } return 0; } -int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, + MPI_Comm comm) { - MPI_Request * reqs, * req_ptr; + MPI_Request *reqs, *req_ptr; MPI_Aint extent; - MPI_Status status, * statuses; + MPI_Status status, *statuses; int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count; int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base; int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1; - char * tmp_buff1, * tmp_buff2; + char *tmp_buff1, *tmp_buff2; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); MPI_Type_extent(send_type, &extent); if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z)) - return failure; + return failure; num_reqs = X; - if (Y > X) num_reqs = Y; - if (Z > Y) num_reqs = Z; + if (Y > X) + num_reqs = Y; + if (Z > Y) + num_reqs = Z; two_dsize = X * Y; - my_z = rank / two_dsize; + my_z = rank / two_dsize; my_row_base = (rank / X) * X; my_col_base = (rank % Y) + (my_z * two_dsize); @@ -75,124 +77,117 @@ int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Dataty block_size = extent * send_count; - tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize); - if (!tmp_buff1) - { - printf("alltoall-3Dmesh:97: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - - tmp_buff2 =(char *) malloc(block_size * two_dsize); - if (!tmp_buff2) - { - printf("alltoall-3Dmesh:105: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - - statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); - reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); - if (!reqs) - { - printf("alltoall-3Dmesh:113: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - + tmp_buff1 = (char *) malloc(block_size * num_procs * two_dsize); + if (!tmp_buff1) { + printf("alltoall-3Dmesh:97: cannot allocate memory\n"); + MPI_Finalize(); + exit(failure); + } + + tmp_buff2 = (char *) malloc(block_size * two_dsize); + if (!tmp_buff2) { + printf("alltoall-3Dmesh:105: cannot allocate memory\n"); + MPI_Finalize(); + exit(failure); + } + + statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); + reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); + if (!reqs) { + printf("alltoall-3Dmesh:113: cannot allocate memory\n"); + MPI_Finalize(); + exit(failure); + } + req_ptr = reqs; - + send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs; - MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, - tmp_buff1 + recv_offset, num_procs * recv_count, - recv_type, rank, tag, comm, &status); + MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, + tmp_buff1 + recv_offset, num_procs * recv_count, + recv_type, rank, tag, comm, &status); count = send_count * num_procs; - for (i = 0; i < Y; i++) - { - src = i + my_row_base; - if (src == rank) continue; - recv_offset = (src % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, - req_ptr++); - } + for (i = 0; i < Y; i++) { + src = i + my_row_base; + if (src == rank) + continue; + recv_offset = (src % two_dsize) * block_size * num_procs; + MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, + req_ptr++); + } - for (i = 0; i < Y; i++) - { - dst = i + my_row_base; - if (dst == rank) continue; - MPI_Send(send_buff, count, send_type, dst, tag, comm); - } + for (i = 0; i < Y; i++) { + dst = i + my_row_base; + if (dst == rank) + continue; + MPI_Send(send_buff, count, send_type, dst, tag, comm); + } MPI_Waitall(Y - 1, reqs, statuses); req_ptr = reqs; - - - for (i = 0; i < X; i++) - { - src = (i * Y + my_col_base); - if (src == rank) continue; - - src_row_base = (src / X) * X; - - recv_offset = (src_row_base % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, - recv_type, src, tag, comm, req_ptr++); - } - send_offset = (my_row_base % two_dsize) * block_size * num_procs; - for (i = 0; i < X; i++) - { - dst = (i * Y + my_col_base); - if (dst == rank) continue; - MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, - dst, tag, comm); - } - + + for (i = 0; i < X; i++) { + src = (i * Y + my_col_base); + if (src == rank) + continue; + + src_row_base = (src / X) * X; + + recv_offset = (src_row_base % two_dsize) * block_size * num_procs; + MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, + recv_type, src, tag, comm, req_ptr++); + } + + send_offset = (my_row_base % two_dsize) * block_size * num_procs; + for (i = 0; i < X; i++) { + dst = (i * Y + my_col_base); + if (dst == rank) + continue; + MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, + dst, tag, comm); + } + MPI_Waitall(X - 1, reqs, statuses); req_ptr = reqs; - for (i = 0; i < two_dsize; i++) - { - send_offset = (rank * block_size) + (i * block_size * num_procs); - recv_offset = (my_z_base * block_size) + (i * block_size); - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, - recv_buff + recv_offset, recv_count, recv_type, rank, tag, - comm, &status); - } + for (i = 0; i < two_dsize; i++) { + send_offset = (rank * block_size) + (i * block_size * num_procs); + recv_offset = (my_z_base * block_size) + (i * block_size); + MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, + recv_buff + recv_offset, recv_count, recv_type, rank, tag, + comm, &status); + } - for (i = 1; i < Z; i++) - { - src = (rank + i * two_dsize) % num_procs; - src_z_base = (src / two_dsize) * two_dsize; + for (i = 1; i < Z; i++) { + src = (rank + i * two_dsize) % num_procs; + src_z_base = (src / two_dsize) * two_dsize; - recv_offset = (src_z_base * block_size); + recv_offset = (src_z_base * block_size); - MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type, - src, tag, comm, req_ptr++); + MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type, + src, tag, comm, req_ptr++); } - for (i = 1; i < Z; i++) - { - dst = (rank + i * two_dsize) % num_procs; - - recv_offset = 0; - for (j = 0; j < two_dsize; j++) - { - send_offset = (dst + j * num_procs) * block_size; - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, - rank, tag, tmp_buff2 + recv_offset, recv_count, - recv_type, rank, tag, comm, &status); - - recv_offset += block_size; - } - - MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); - + for (i = 1; i < Z; i++) { + dst = (rank + i * two_dsize) % num_procs; + + recv_offset = 0; + for (j = 0; j < two_dsize; j++) { + send_offset = (dst + j * num_procs) * block_size; + MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + rank, tag, tmp_buff2 + recv_offset, recv_count, + recv_type, rank, tag, comm, &status); + + recv_offset += block_size; } - + + MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); + + } + MPI_Waitall(Z - 1, reqs, statuses); free(reqs); diff --git a/src/smpi/colls/alltoall-pair.c b/src/smpi/colls/alltoall-pair.c index 83658e02c0..b54b226e69 100644 --- a/src/smpi/colls/alltoall-pair.c +++ b/src/smpi/colls/alltoall-pair.c @@ -1,4 +1,5 @@ #include "smpi/mpi.h" + /***************************************************************************** * Function: alltoall_pair @@ -21,10 +22,9 @@ ****************************************************************************/ /* -int -alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int alltoall_pair(void *send_buff, int send_count, MPI_Datatype send_type, + void *recv_buff, int recv_count, MPI_Datatype recv_type, + MPI_Comm comm) { MPI_Aint send_chunk, recv_chunk; @@ -34,36 +34,36 @@ alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type, int i, src, dst, rank, num_procs; int tag = 1, success = 1, failure = 0, pof2 = 1; - char * send_ptr = (char *) send_buff; - char * recv_ptr = (char *) recv_buff; - + char *send_ptr = (char *) send_buff; + char *recv_ptr = (char *) recv_buff; + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); MPI_Type_extent(send_type, &send_chunk); MPI_Type_extent(recv_type, &recv_chunk); - MPI_Win_create(recv_buff, num_procs*recv_chunk*send_count,recv_chunk,0, - comm, &win); + MPI_Win_create(recv_buff, num_procs * recv_chunk * send_count, recv_chunk, 0, + comm, &win); send_chunk *= send_count; - recv_chunk *= recv_count; + recv_chunk *= recv_count; MPI_Win_fence(assert, win); - for (i = 0; i < num_procs; i++) - { - src = dst = rank ^ i; - MPI_Put(send_ptr + dst * send_chunk, send_count, send_type, dst, - rank*send_chunk, send_count, send_type, win); - } - MPI_Win_fence (assert, win); + for (i = 0; i < num_procs; i++) { + src = dst = rank ^ i; + MPI_Put(send_ptr + dst * send_chunk, send_count, send_type, dst, + rank * send_chunk, send_count, send_type, win); + } + MPI_Win_fence(assert, win); MPI_Win_free(&win); return 0; } */ -int -smpi_coll_tuned_alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, + MPI_Comm comm) { MPI_Aint send_chunk, recv_chunk; @@ -71,25 +71,23 @@ smpi_coll_tuned_alltoall_pair(void * send_buff, int send_count, MPI_Datatype sen int i, src, dst, rank, num_procs; int tag = 1, success = 1; - char * send_ptr = (char *) send_buff; - char * recv_ptr = (char *) recv_buff; - + char *send_ptr = (char *) send_buff; + char *recv_ptr = (char *) recv_buff; + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); MPI_Type_extent(send_type, &send_chunk); MPI_Type_extent(recv_type, &recv_chunk); send_chunk *= send_count; - recv_chunk *= recv_count; + recv_chunk *= recv_count; - for (i = 0; i < num_procs; i++) - { - src = dst = rank ^ i; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, - tag, recv_ptr + src * recv_chunk, recv_count, recv_type, - src, tag, comm, &s); - } + for (i = 0; i < num_procs; i++) { + src = dst = rank ^ i; + MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + tag, recv_ptr + src * recv_chunk, recv_count, recv_type, + src, tag, comm, &s); + } return success; } - diff --git a/src/smpi/colls/alltoall-rdb.c b/src/smpi/colls/alltoall-rdb.c index b5bd8a52fb..bbd337868f 100644 --- a/src/smpi/colls/alltoall-rdb.c +++ b/src/smpi/colls/alltoall-rdb.c @@ -1,4 +1,5 @@ #include "colls.h" + /***************************************************************************** * Function: alltoall_rdb @@ -20,10 +21,11 @@ * Auther: MPICH / slightly modified by Ahmad Faraj. ****************************************************************************/ -int -smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, + MPI_Comm comm) { /* MPI variables */ MPI_Status status; @@ -34,9 +36,9 @@ smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send int last_recv_count, tmp_mask, tree_root, num_procs_completed; int tag = 1, mask = 1, success = 1, failure = 0, c = 0, i = 0; - char * tmp_buff; - char * send_ptr = (char *) send_buff; - char * recv_ptr = (char *) recv_buff; + char *tmp_buff; + char *send_ptr = (char *) send_buff; + char *recv_ptr = (char *) recv_buff; MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm, &rank); @@ -46,16 +48,15 @@ smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send send_increment *= (send_count * num_procs); recv_increment *= (recv_count * num_procs); - + max_size = num_procs * recv_increment; - - tmp_buff = (char * ) malloc(max_size); - if (!tmp_buff) - { - printf("alltoall-rdb:56: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } + + tmp_buff = (char *) malloc(max_size); + if (!tmp_buff) { + printf("alltoall-rdb:56: cannot allocate memory\n"); + MPI_Finalize(); + exit(failure); + } curr_size = send_count * num_procs; @@ -63,98 +64,91 @@ smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send tmp_buff + (rank * recv_increment), curr_size, recv_type, rank, tag, comm, &status); - while (mask < num_procs) - { - dst = rank ^ mask; - dst_tree_root = dst >> i; - dst_tree_root <<= i; - rank_tree_root = rank >> i; - rank_tree_root <<= i; - send_offset = rank_tree_root * send_increment; - recv_offset = dst_tree_root * recv_increment; - - if (dst < num_procs) - { - MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag, - tmp_buff + recv_offset, mask * recv_count * num_procs, - recv_type, dst, tag, comm, &status); - - MPI_Get_count(&status, recv_type, &last_recv_count); - curr_size += last_recv_count; - } - - - if (dst_tree_root + mask > num_procs) - { - - num_procs_completed = num_procs - rank_tree_root - mask; - /* num_procs_completed is the number of processes in this - subtree that have all the data. Send data to others - in a tree fashion. First find root of current tree - that is being divided into two. k is the number of - least-significant bits in this process's rank that - must be zeroed out to find the rank of the root */ - - j = mask; - k = 0; - while (j) - { - j >>= 1; - k++; - } - k--; - - tmp_mask = mask >> 1; - - while (tmp_mask) - { - dst = rank ^ tmp_mask; - - tree_root = rank >> k; - tree_root <<= k; - - /* send only if this proc has data and destination - doesn't have data. at any step, multiple processes - can send if they have the data */ - - if ((dst > rank) - && (rank < tree_root + num_procs_completed) - && (dst >= tree_root + num_procs_completed)) - { - MPI_Send(tmp_buff + dst_tree_root * send_increment, - last_recv_count, send_type, dst, tag, comm); - - } - - /* recv only if this proc. doesn't have data and sender - has data */ - - else if ((dst < rank) - && (dst < tree_root + num_procs_completed) - && (rank >= tree_root + num_procs_completed)) - { - MPI_Recv(tmp_buff + dst_tree_root * send_increment, - mask * num_procs * send_count, send_type, dst, - tag, comm, &status); - - MPI_Get_count(&status, send_type, &last_recv_count); - curr_size += last_recv_count; - } - - tmp_mask >>= 1; - k--; - } - } - - mask <<= 1; - i++; + while (mask < num_procs) { + dst = rank ^ mask; + dst_tree_root = dst >> i; + dst_tree_root <<= i; + rank_tree_root = rank >> i; + rank_tree_root <<= i; + send_offset = rank_tree_root * send_increment; + recv_offset = dst_tree_root * recv_increment; + + if (dst < num_procs) { + MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag, + tmp_buff + recv_offset, mask * recv_count * num_procs, + recv_type, dst, tag, comm, &status); + + MPI_Get_count(&status, recv_type, &last_recv_count); + curr_size += last_recv_count; + } + + + if (dst_tree_root + mask > num_procs) { + + num_procs_completed = num_procs - rank_tree_root - mask; + /* num_procs_completed is the number of processes in this + subtree that have all the data. Send data to others + in a tree fashion. First find root of current tree + that is being divided into two. k is the number of + least-significant bits in this process's rank that + must be zeroed out to find the rank of the root */ + + j = mask; + k = 0; + while (j) { + j >>= 1; + k++; + } + k--; + + tmp_mask = mask >> 1; + + while (tmp_mask) { + dst = rank ^ tmp_mask; + + tree_root = rank >> k; + tree_root <<= k; + + /* send only if this proc has data and destination + doesn't have data. at any step, multiple processes + can send if they have the data */ + + if ((dst > rank) + && (rank < tree_root + num_procs_completed) + && (dst >= tree_root + num_procs_completed)) { + MPI_Send(tmp_buff + dst_tree_root * send_increment, + last_recv_count, send_type, dst, tag, comm); + + } + + /* recv only if this proc. doesn't have data and sender + has data */ + + else if ((dst < rank) + && (dst < tree_root + num_procs_completed) + && (rank >= tree_root + num_procs_completed)) { + MPI_Recv(tmp_buff + dst_tree_root * send_increment, + mask * num_procs * send_count, send_type, dst, + tag, comm, &status); + + MPI_Get_count(&status, send_type, &last_recv_count); + curr_size += last_recv_count; + } + + tmp_mask >>= 1; + k--; + } } + mask <<= 1; + i++; + } + for (i = 0; i < num_procs; i++) - MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, - send_count, send_type, rank, tag, - recv_ptr + (i * recv_count * extent), - recv_count, recv_type, rank, tag, comm, &status); + MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, + send_count, send_type, rank, tag, + recv_ptr + (i * recv_count * extent), + recv_count, recv_type, rank, tag, comm, &status); free(tmp_buff); return success; } diff --git a/src/smpi/colls/alltoall-simple.c b/src/smpi/colls/alltoall-simple.c index df2be4e102..b8c72540ca 100644 --- a/src/smpi/colls/alltoall-simple.c +++ b/src/smpi/colls/alltoall-simple.c @@ -22,11 +22,11 @@ * Auther: Ahmad Faraj ****************************************************************************/ -int -smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count, - MPI_Datatype send_type, void * recv_buff, - int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, + MPI_Comm comm) { int i, rank, size, nreqs, err, src, dst, tag = 101; char *psnd; @@ -36,7 +36,7 @@ smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count, MPI_Request *req; MPI_Request *preq; MPI_Request *qreq; - MPI_Status s, * statuses; + MPI_Status s, *statuses; MPI_Comm_size(comm, &size); @@ -49,45 +49,42 @@ smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count, /* Allocate arrays of requests. */ nreqs = 2 * (size - 1); - if (nreqs > 0) - { - req = (MPI_Request *) malloc(nreqs * sizeof(MPI_Request)); - statuses = (MPI_Status *) malloc(nreqs * sizeof(MPI_Status)); - if (!req || !statuses) - { - free(req); - free(statuses); - return 0; - } + if (nreqs > 0) { + req = (MPI_Request *) malloc(nreqs * sizeof(MPI_Request)); + statuses = (MPI_Status *) malloc(nreqs * sizeof(MPI_Status)); + if (!req || !statuses) { + free(req); + free(statuses); + return 0; } - else + } else req = 0; /* simple optimization */ psnd = ((char *) send_buff) + (rank * sndinc); prcv = ((char *) recv_buff) + (rank * rcvinc); - MPI_Sendrecv (psnd, send_count, send_type, rank, tag, - prcv, recv_count, recv_type, - rank, tag, comm, &s); + MPI_Sendrecv(psnd, send_count, send_type, rank, tag, + prcv, recv_count, recv_type, rank, tag, comm, &s); /* Initiate all send/recv to/from others. */ preq = req; qreq = req + size - 1; - prcv = (char*) recv_buff; - psnd = (char*) send_buff; - for (i = 0; i < size; i++) - { - src = dst = (rank + i) % size; - if (src == rank) continue; - if (dst == rank) continue; - MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src, - tag, comm, preq++); - MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst, - tag, comm, qreq++); - } + prcv = (char *) recv_buff; + psnd = (char *) send_buff; + for (i = 0; i < size; i++) { + src = dst = (rank + i) % size; + if (src == rank) + continue; + if (dst == rank) + continue; + MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src, + tag, comm, preq++); + MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst, + tag, comm, qreq++); + } /* Start all the requests. */ @@ -107,9 +104,9 @@ smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count, err = MPI_Request_free(preq); if (err != MPI_SUCCESS) { if (req) - free((char *) req); + free((char *) req); if (statuses) - free(statuses); + free(statuses); return err; } } @@ -122,5 +119,3 @@ smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count, free(statuses); return (1); } - - -- 2.20.1