X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/7ce270272856fcab2790a35bcefb9d6e0fb52423..cc63d7b267be4c20633a0be7db63b3d88030bee6:/src/smpi/colls/alltoall-3dmesh.c diff --git a/src/smpi/colls/alltoall-3dmesh.c b/src/smpi/colls/alltoall-3dmesh.c index afd47ce6b8..6f43a425a4 100644 --- a/src/smpi/colls/alltoall-3dmesh.c +++ b/src/smpi/colls/alltoall-3dmesh.c @@ -1,4 +1,10 @@ -#include "colls.h" +/* Copyright (c) 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "colls_private.h" #include /***************************************************************************** @@ -24,50 +30,51 @@ * Auther: Ahmad Faraj ****************************************************************************/ -int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k) +static int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k) { int x, max = num / 3; - x = cbrt(num); - * i = * j = * k = 0; - while (x <= max) - { - if ((num % (x * x)) == 0) - { - * i = * j = x; - * k = num / (x * x); - return 1; - } - x++; + x = cbrt(num); + *i = *j = *k = 0; + while (x <= max) { + if ((num % (x * x)) == 0) { + *i = *j = x; + *k = num / (x * x); + return 1; } + x++; + } return 0; } -int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type, - void * recv_buff, int recv_count, MPI_Datatype recv_type, - MPI_Comm comm) +int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, + MPI_Datatype send_type, + void *recv_buff, int recv_count, + MPI_Datatype recv_type, MPI_Comm comm) { - MPI_Request * reqs, * req_ptr; + MPI_Request *reqs, *req_ptr; MPI_Aint extent; - MPI_Status status, * statuses; + MPI_Status status, *statuses; int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count; int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base; - int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1; + int src_z_base, send_offset, recv_offset, tag = COLL_TAG_ALLTOALL; - char * tmp_buff1, * tmp_buff2; + char *tmp_buff1, *tmp_buff2; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z)) - return failure; + return MPI_ERR_OTHER; num_reqs = X; - if (Y > X) num_reqs = Y; - if (Z > Y) num_reqs = Z; + if (Y > X) + num_reqs = Y; + if (Z > Y) + num_reqs = Z; two_dsize = X * Y; - my_z = rank / two_dsize; + my_z = rank / two_dsize; my_row_base = (rank / X) * X; my_col_base = (rank % Y) + (my_z * two_dsize); @@ -75,129 +82,105 @@ int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Dataty block_size = extent * send_count; - tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize); - if (!tmp_buff1) - { - printf("alltoall-3Dmesh:97: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - - tmp_buff2 =(char *) malloc(block_size * two_dsize); - if (!tmp_buff2) - { - printf("alltoall-3Dmesh:105: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - - statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); - reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); - if (!reqs) - { - printf("alltoall-3Dmesh:113: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } - + tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * two_dsize); + tmp_buff2 = (char *) xbt_malloc(block_size * two_dsize); + + statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status)); + reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); + req_ptr = reqs; - - send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs; - MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, - tmp_buff1 + recv_offset, num_procs * recv_count, - recv_type, rank, tag, comm, &status); + recv_offset = (rank % two_dsize) * block_size * num_procs; + + smpi_mpi_sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, + tmp_buff1 + recv_offset, num_procs * recv_count, + recv_type, rank, tag, comm, &status); count = send_count * num_procs; - for (i = 0; i < Y; i++) - { - src = i + my_row_base; - if (src == rank) continue; - recv_offset = (src % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, - req_ptr++); - } + for (i = 0; i < Y; i++) { + src = i + my_row_base; + if (src == rank) + continue; + recv_offset = (src % two_dsize) * block_size * num_procs; + *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); + } - for (i = 0; i < Y; i++) - { - dst = i + my_row_base; - if (dst == rank) continue; - MPI_Send(send_buff, count, send_type, dst, tag, comm); - } + for (i = 0; i < Y; i++) { + dst = i + my_row_base; + if (dst == rank) + continue; + smpi_mpi_send(send_buff, count, send_type, dst, tag, comm); + } - MPI_Waitall(Y - 1, reqs, statuses); + smpi_mpi_waitall(Y - 1, reqs, statuses); req_ptr = reqs; - - - for (i = 0; i < X; i++) - { - src = (i * Y + my_col_base); - if (src == rank) continue; - - src_row_base = (src / X) * X; - - recv_offset = (src_row_base % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, - recv_type, src, tag, comm, req_ptr++); - } - send_offset = (my_row_base % two_dsize) * block_size * num_procs; - for (i = 0; i < X; i++) - { - dst = (i * Y + my_col_base); - if (dst == rank) continue; - MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, - dst, tag, comm); - } - - MPI_Waitall(X - 1, reqs, statuses); + + for (i = 0; i < X; i++) { + src = (i * Y + my_col_base); + if (src == rank) + continue; + + src_row_base = (src / X) * X; + + recv_offset = (src_row_base % two_dsize) * block_size * num_procs; + *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, + recv_type, src, tag, comm); + } + + send_offset = (my_row_base % two_dsize) * block_size * num_procs; + for (i = 0; i < X; i++) { + dst = (i * Y + my_col_base); + if (dst == rank) + continue; + smpi_mpi_send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, + dst, tag, comm); + } + + smpi_mpi_waitall(X - 1, reqs, statuses); req_ptr = reqs; - for (i = 0; i < two_dsize; i++) - { - send_offset = (rank * block_size) + (i * block_size * num_procs); - recv_offset = (my_z_base * block_size) + (i * block_size); - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, - recv_buff + recv_offset, recv_count, recv_type, rank, tag, - comm, &status); - } + for (i = 0; i < two_dsize; i++) { + send_offset = (rank * block_size) + (i * block_size * num_procs); + recv_offset = (my_z_base * block_size) + (i * block_size); + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, + (char *) recv_buff + recv_offset, recv_count, recv_type, + rank, tag, comm, &status); + } - for (i = 1; i < Z; i++) - { - src = (rank + i * two_dsize) % num_procs; - src_z_base = (src / two_dsize) * two_dsize; + for (i = 1; i < Z; i++) { + src = (rank + i * two_dsize) % num_procs; + src_z_base = (src / two_dsize) * two_dsize; - recv_offset = (src_z_base * block_size); + recv_offset = (src_z_base * block_size); - MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type, - src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + recv_offset, recv_count * two_dsize, + recv_type, src, tag, comm); } - for (i = 1; i < Z; i++) - { - dst = (rank + i * two_dsize) % num_procs; - - recv_offset = 0; - for (j = 0; j < two_dsize; j++) - { - send_offset = (dst + j * num_procs) * block_size; - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, - rank, tag, tmp_buff2 + recv_offset, recv_count, - recv_type, rank, tag, comm, &status); - - recv_offset += block_size; - } - - MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); - + for (i = 1; i < Z; i++) { + dst = (rank + i * two_dsize) % num_procs; + + recv_offset = 0; + for (j = 0; j < two_dsize; j++) { + send_offset = (dst + j * num_procs) * block_size; + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, + rank, tag, tmp_buff2 + recv_offset, recv_count, + recv_type, rank, tag, comm, &status); + + recv_offset += block_size; } - - MPI_Waitall(Z - 1, reqs, statuses); + + smpi_mpi_send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); + + } + + smpi_mpi_waitall(Z - 1, reqs, statuses); free(reqs); free(statuses); free(tmp_buff1); free(tmp_buff2); - return success; + return MPI_SUCCESS; }