From a4eb1ae8b7be9ad432765f2a6a3d16c115c03603 Mon Sep 17 00:00:00 2001 From: jean-noel quintin Date: Tue, 9 Oct 2012 09:12:28 +0100 Subject: [PATCH 1/1] [smpi,example] an example of matrix multiplication with non contignous memory I hope this example is relevant and could increase the coverage --- buildtools/Cmake/MakeExe.cmake | 1 + examples/smpi/MM/2.5D_MM.c | 278 +++++++++++++++++++++++ examples/smpi/MM/2.5D_MM.h | 6 + examples/smpi/MM/CMakeLists.txt | 46 ++++ examples/smpi/MM/MM_mpi.c | 178 +++++++++++++++ examples/smpi/MM/Matrix_init.c | 194 ++++++++++++++++ examples/smpi/MM/Matrix_init.h | 19 ++ examples/smpi/MM/Summa.c | 157 +++++++++++++ examples/smpi/MM/Summa.h | 12 + examples/smpi/MM/command_exemple | 1 + examples/smpi/MM/host | 4 + examples/smpi/MM/param.c | 193 ++++++++++++++++ examples/smpi/MM/param.h | 35 +++ examples/smpi/MM/param_template_file.txt | 9 + examples/smpi/MM/timer.c | 35 +++ examples/smpi/MM/timer.h | 16 ++ examples/smpi/MM/topo.c | 67 ++++++ examples/smpi/MM/topo.h | 16 ++ 18 files changed, 1267 insertions(+) create mode 100644 examples/smpi/MM/2.5D_MM.c create mode 100644 examples/smpi/MM/2.5D_MM.h create mode 100644 examples/smpi/MM/CMakeLists.txt create mode 100644 examples/smpi/MM/MM_mpi.c create mode 100644 examples/smpi/MM/Matrix_init.c create mode 100644 examples/smpi/MM/Matrix_init.h create mode 100644 examples/smpi/MM/Summa.c create mode 100644 examples/smpi/MM/Summa.h create mode 100644 examples/smpi/MM/command_exemple create mode 100644 examples/smpi/MM/host create mode 100644 examples/smpi/MM/param.c create mode 100644 examples/smpi/MM/param.h create mode 100644 examples/smpi/MM/param_template_file.txt create mode 100644 examples/smpi/MM/timer.c create mode 100644 examples/smpi/MM/timer.h create mode 100644 examples/smpi/MM/topo.c create mode 100644 examples/smpi/MM/topo.h diff --git a/buildtools/Cmake/MakeExe.cmake b/buildtools/Cmake/MakeExe.cmake index 57d7da86a0..41f8d0e70b 100644 --- a/buildtools/Cmake/MakeExe.cmake +++ b/buildtools/Cmake/MakeExe.cmake @@ -80,3 +80,4 @@ add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/properties) add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/scheduling) add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/smpi) +add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/smpi/MM) diff --git a/examples/smpi/MM/2.5D_MM.c b/examples/smpi/MM/2.5D_MM.c new file mode 100644 index 0000000000..63e4058619 --- /dev/null +++ b/examples/smpi/MM/2.5D_MM.c @@ -0,0 +1,278 @@ +/*! + * 2.5D Block Matrix Multiplication example + * + * Authors: Quintin Jean-Noël + */ + +#include "Matrix_init.h" +#include "Summa.h" +#include "timer.h" +#include +#include "xbt/log.h" +#define CHECK_25D 1 + + XBT_LOG_NEW_DEFAULT_CATEGORY(25D_MM, + "Messages specific for this msg example"); + +double two_dot_five( + size_t m, size_t k, size_t n, + size_t Block_size, size_t group, size_t key, + size_t size_row, size_t size_col, size_t NB_groups ){ + double *a, *b, *c, *res; + /* Split the communicator into groups */ + + /* Find out my identity in the default communicator */ + int myrank; + int NB_proc; + int err; + int useless = 0; + + + double time, communication_time = 0; + struct timespec start_time, end_time; //time mesure + struct timespec end_time_intern; //time mesure + struct timespec start_time_reduce, end_time_reduce; //time mesure + + MPI_Comm my_world; + + if ( group >= NB_groups ){ + XBT_DEBUG( + "Not enough group NB_groups : %zu my group id : %zu\n", + NB_groups, group); + MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world); + return -1; + }else{ + MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world); + } + + MPI_Comm_size (my_world, &NB_proc); + + if ( NB_proc < (int)(size_row*size_col*NB_groups) ){ + XBT_INFO( + "Not enough processors NB_proc : %d required : %zu\n", + NB_proc, size_row*size_col*NB_groups); + return -1; + } + + + + MPI_Comm group_comm; + MPI_Comm_split(my_world, group, key, &group_comm); + + MPI_Comm_rank(group_comm, &myrank); + MPI_Comm_size (group_comm, &NB_proc); + /* for each group start the execution of his */ + + + NB_proc=size_row*size_col; + size_t row = myrank / size_row; + size_t col = myrank % size_row; + + + + + /*-------------------------Check some mandatory conditions------------------*/ + size_t NB_Block = k / Block_size; + if ( k % Block_size != 0 ){ + XBT_INFO( + "The matrix size has to be proportionnal to the number\ + of blocks: %zu\n", NB_Block); + return -1; + } + + if ( size_row > NB_Block || size_col > NB_Block ){ + XBT_INFO( + "Number of blocks is too small compare to the number of" + " processors (%zu,%zu) in a row or a col (%zu)\n", + size_col, size_row, NB_Block); + return -1; + } + + if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){ + XBT_INFO("The number of Block by processor is not an %s", + "integer\n"); + return -1; + } + + + + if(row >= size_col || col >= size_row){ + XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, " + "size_col: %zu , size_row: %zu \n", + col,row,size_col,size_row); + useless = 1; + } + + + if(useless == 1){ + /*----------------------Prepare the Communication Layer-------------------*/ + /* add useless processor on a new color to execute the matrix + * multiplication with the other processors*/ + + /* Split comm size_to row and column comms */ + MPI_Comm row_comm, col_comm, group_line; + MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line); + /* color by row, rank by column */ + MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm); + /* color by column, rank by row */ + MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm); + /*------------------------Communication Layer can be used-----------------*/ + + return 0; + } + XBT_DEBUG( "I'm initialized col: %zu row: %zu, " + "size_col: %zu , size_row: %zu, my rank: %d \n" + ,col,row,size_col,size_row, myrank); + + + + /*------------------------Initialize the matrices---------------------------*/ + + /* think about a common interface + * int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, + * m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb, + * beta, c, ldc, Comm, rank ); + */ + + /*------------------------Prepare the Communication Layer-------------------*/ + /* Split comm size_to row and column comms */ + MPI_Comm row_comm, col_comm, group_line; + MPI_Comm_split(my_world, myrank, group, &group_line); + /* color by row, rank by column */ + MPI_Comm_split(group_comm, row, col, &row_comm); + /* color by column, rank by row */ + MPI_Comm_split(group_comm, col, row, &col_comm); + /*-------------------------Communication Layer can be used------------------*/ + + // matrix sizes + m = m / size_col; + n = n / size_row; + size_t k_a = k / size_row; + size_t k_b = k / size_col; + + + + /*only on the group 0*/ + if( group == 0 ) { + matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col); + if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n ); + } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n); + + + /*-------------------Configuration for Summa algorihtm--------------------*/ + /*--------------------Allocation of matrices block-------------------------*/ + double *a_Summa, *b_Summa; + blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n); + + /*--------------------Communication types for MPI--------------------------*/ + MPI_Datatype Block_a; + MPI_Datatype Block_a_local; + MPI_Datatype Block_b; + MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a); + MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local); + MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b); + MPI_Type_commit(&Block_a); + MPI_Type_commit(&Block_a_local); + MPI_Type_commit(&Block_b); + /*-------------Communication types for MPI are configured------------------*/ + + + + + MPI_Barrier(my_world); + get_time(&start_time); + if( NB_groups > 1 ) { + err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line); + if (err != MPI_SUCCESS) { + perror("Error Bcast A\n"); + return -1; + } + err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line); + if (err != MPI_SUCCESS) { + perror("Error Bcast B\n"); + return -1; + } + MPI_Barrier(my_world); + } + get_time(&end_time_intern); + communication_time += get_timediff(&start_time,&end_time_intern); + + XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n" + ,group,NB_Block, NB_groups); + XBT_INFO( + "m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, " + "group*NB_Block/NB_groups %zu, " + "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu," + "size_row %zu, size_col %zu\n" , + m, k_a, k_b, n, Block_size, + group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups, + row, col, size_row, size_col); + + + Summa( a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size, + group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups, + row, col, size_row, size_col, a_Summa, b_Summa, Block_a, + Block_a_local, Block_b, row_comm, col_comm, 0); + + /*-------------------------End Summa algorihtm-----------------------------*/ + + MPI_Comm_rank(group_line, &myrank); + + MPI_Barrier(my_world); + get_time(&start_time_reduce); + if( NB_groups > 1 ) { + // a gather is better? + err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line); + if (err != MPI_SUCCESS) { + perror("Error Bcast A\n"); + return -1; + } + }else{ + double *swap= c; + c = res; + res=swap; + } + MPI_Barrier(my_world); + get_time(&end_time_reduce); + + MPI_Barrier(my_world); + get_time(&end_time); + time = get_timediff(&start_time,&end_time); + double reduce_time = get_timediff(&start_time_reduce,&end_time_reduce); + printf("communication time: %le reduce time: %le nanoseconds, " + "total time: %le nanoseconds\n",communication_time,reduce_time,time); + MPI_Barrier(my_world); + +#if CHECK_25D + if(myrank == 0) + check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col); +#endif + + // close properly the pragram + + MPI_Type_free(&Block_a); + MPI_Type_free(&Block_a_local); + MPI_Type_free(&Block_b); + + free(a_Summa); + free(b_Summa); + + + free( a ); + free( b ); + if( NB_groups > 1 ) { + free( c ); + } + free(res); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_free(&my_world); + MPI_Comm_free(&group_comm); + MPI_Comm_free(&group_line); + MPI_Comm_free(&row_comm); + MPI_Comm_free(&col_comm); + return 0; +} + + + diff --git a/examples/smpi/MM/2.5D_MM.h b/examples/smpi/MM/2.5D_MM.h new file mode 100644 index 0000000000..6a92c33f70 --- /dev/null +++ b/examples/smpi/MM/2.5D_MM.h @@ -0,0 +1,6 @@ +#include +double two_dot_five( + size_t m, size_t k, size_t n, + size_t Block_size, size_t group, size_t key, + size_t size_row, size_t size_col, size_t NB_groups ); + diff --git a/examples/smpi/MM/CMakeLists.txt b/examples/smpi/MM/CMakeLists.txt new file mode 100644 index 0000000000..a4a1c2f7a7 --- /dev/null +++ b/examples/smpi/MM/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 2.6) + +if(enable_smpi) + set(CMAKE_C_COMPILER "${CMAKE_BINARY_DIR}/bin/smpicc") + + set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}") + + include_directories("${CMAKE_HOME_DIRECTORY}/include/smpi") + +add_executable(MM_mpi MM_mpi.c 2.5D_MM.c Summa.c timer.c topo.c param.c Matrix_init.c) + +### Add definitions for compile +if(NOT WIN32) + target_link_libraries(MM_mpi simgrid pthread m smpi) +else(NOT WIN32) + target_link_libraries(MM_mpi simgrid smpi) +endif(NOT WIN32) +endif(enable_smpi) + +set(tesh_files + ${tesh_files} + PARENT_SCOPE + ) +set(xml_files + ${xml_files} + PARENT_SCOPE + ) +set(examples_src + ${examples_src} + ${CMAKE_CURRENT_SOURCE_DIR}/MM_mpi.c + ${CMAKE_CURRENT_SOURCE_DIR}/2.5D_MM.c + ${CMAKE_CURRENT_SOURCE_DIR}/Summa.c + ${CMAKE_CURRENT_SOURCE_DIR}/timer.c + ${CMAKE_CURRENT_SOURCE_DIR}/topo.c + ${CMAKE_CURRENT_SOURCE_DIR}/param.c + ${CMAKE_CURRENT_SOURCE_DIR}/Matrix_init.c + PARENT_SCOPE + ) +set(bin_files + ${bin_files} + PARENT_SCOPE + ) +set(txt_files + ${txt_files} + PARENT_SCOPE + ) diff --git a/examples/smpi/MM/MM_mpi.c b/examples/smpi/MM/MM_mpi.c new file mode 100644 index 0000000000..7ab7d14371 --- /dev/null +++ b/examples/smpi/MM/MM_mpi.c @@ -0,0 +1,178 @@ +/* + * Block Matrix Multiplication example + * + * Authors: Quintin Jean-Noël + */ + + +#include "topo.h" +#include "param.h" +#include "Matrix_init.h" +#include "2.5D_MM.h" +#include "xbt/log.h" + +/*int sched_setaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); + int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); + */ +#include +#include +#include +#include +#include + + XBT_LOG_NEW_DEFAULT_CATEGORY(MM_mpi, + "Messages specific for this msg example"); + + + +int main(int argc, char ** argv) +{ + + size_t m = 1024 , n = 1024 , k = 1024; + size_t NB_Block = 16; + size_t Block_size = k/NB_Block ; + size_t NB_groups = 1, group = 0, key = 0; + /* x index on M + y index on N + Z index on K */ + + + + int myrank; + int NB_proc; + size_t row, col, size_row, size_col; //description: vitual processor topology + row = 0; + col = 0; + + MPI_Init(&argc, &argv); + + /* Find out my identity in the default communicator */ + + MPI_Comm_rank ( MPI_COMM_WORLD, &myrank ); + MPI_Comm_size ( MPI_COMM_WORLD, &NB_proc ); + + if(NB_proc != 1) + for (size_col=NB_proc/2; NB_proc%size_col; size_col--); + else + size_col = 1; + + size_row = NB_proc/size_col; + if (size_row > size_col){ + size_col = size_row; + size_row = NB_proc/size_col; + } + + + // for the degub +#if DEBUG_MPI + size_t loop=1; + while(loop==1); +#endif + + int opt, display = 0; + char *conf_file = NULL; + optind = 1; + + //get the parameter from command line + while ((opt = getopt(argc, argv, "hdf:r:c:M:N:K:B:G:g:k:P:")) != -1) { + switch(opt) { + case 'h': + XBT_INFO( + "Usage: mxm_cblas_test [options]\n" + " -M I M size (default: %zu)\n" + " -N I N size (default: %zu)\n" + " -K I K size (default: %zu)\n" + " -B I Block size on the k dimension(default: %zu)\n" + " -G I Number of processor groups(default: %zu)\n" + " -g I group index(default: %zu)\n" + " -k I group rank(default: %zu)\n" + " -r I processor row size (default: %zu)\n" + " -c I processor col size (default: %zu)\n" + " -f {Filename} provide the file with the configuration\n" + " -d display the configuration file on the stderr\n" + " -h help\n", + m, n, k, Block_size, NB_groups, group, key, row, col); + return 0; + case 'M': + m = atoi(optarg); + break; + case 'N': + n = atoi(optarg); + break; + case 'K': + k = atoi(optarg); + break; + case 'B': + Block_size = atoi(optarg); + break; + case 'G': + NB_groups = atoi(optarg); + break; + case 'g': + group = atoi(optarg); + break; + case 'k': + key = atoi(optarg); + break; + case 'r': + size_row = atoi(optarg); + break; + case 'c': + size_col = atoi(optarg); + break; + /*case 'P': + str_mask = strdup(optarg); + break;*/ + case 'f': + conf_file = strdup(optarg); + break; + case 'd': + display = 1; + break; + } + } + if( display == 1 ){ + print_conf(MPI_COMM_WORLD, myrank,NULL,NULL); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + exit(0); + } + + char **conf; + //char ***conf_all; + if(conf_file == NULL){ + conf = get_conf(MPI_COMM_WORLD, "default_conf", -1); + }else{ + conf = get_conf(MPI_COMM_WORLD, conf_file, -1); + //conf_all = get_conf_all(conf_file); + } + if(conf == NULL){ + XBT_INFO( + "No configuration for me inside the file\n"); + }else{ + + + if(NB_groups !=1){ + /* Get my group number from the config file */ + group = (size_t)atoi(conf[4]); + } + } + + + + + + + // Defined the device if we use the GPU + //TODO explain parameters + + + two_dot_five( m, k, n, Block_size, group, key, + size_row, size_col, NB_groups); + + // close properly the pragram +end: + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + return 0; +} diff --git a/examples/smpi/MM/Matrix_init.c b/examples/smpi/MM/Matrix_init.c new file mode 100644 index 0000000000..9688c4ef3a --- /dev/null +++ b/examples/smpi/MM/Matrix_init.c @@ -0,0 +1,194 @@ +#include "Matrix_init.h" +#include +#include "xbt/log.h" + XBT_LOG_NEW_DEFAULT_CATEGORY(MM_init, + "Messages specific for this msg example"); +#define _unused(x) ((void)x) + + +void matrices_initialisation( double ** p_a, double ** p_b, double ** p_c, + size_t m, size_t k_a, size_t k_b, size_t n, + size_t row, size_t col) +{ + + size_t x,y,z; + size_t lda = k_a; + size_t ldb = n; + size_t ldc = n; + double *a, *b, *c; + _unused(row); + + a = malloc(sizeof(double) * m * k_a); + + if ( a == 0 ){ + perror("Error allocation Matrix A"); + exit(-1); + } + + b = malloc(sizeof(double) * k_b * n); + + if ( b == 0 ){ + perror("Error allocation Matrix B"); + exit(-1); + } + + c = malloc(sizeof(double) * m * n); + if ( c == 0 ){ + perror("Error allocation Matrix C"); + exit(-1); + } + + *p_a=a; + *p_b =b; + *p_c=c; + + // size_tialisation of the matrices + for( x=0; x 0.0000001) +#else + if ( fabs(c[x*ldc + y] - y*(size_col*m)*((size_col*m)-1)/2) > 0.0000001) +#endif + { +#ifdef SIMPLE_MATRIX + XBT_INFO( "%f\t%zu, y : %zu x : %zu \n", + c[x*ldc+y], size_row*k_a, y, x); +#else + XBT_INFO( "%f\t%zu, y : %zu x : %zu \n", + c[x*ldc+y], y*(size_col*m)*((size_col*m)-1)/2, y, x); +#endif + goto error_exit; + } + } + } + XBT_INFO("result check: ok\n"); + return; +error_exit: + XBT_INFO("result check not ok\n" + "WARNING the test could be lead to some " + "errors ( precision with double )\n"); + return; +} + + diff --git a/examples/smpi/MM/Matrix_init.h b/examples/smpi/MM/Matrix_init.h new file mode 100644 index 0000000000..a4c04d0db1 --- /dev/null +++ b/examples/smpi/MM/Matrix_init.h @@ -0,0 +1,19 @@ +#include +//#undef CYCLIC +#define CYCLIC +#undef SIMPLE_MATRIX +//#define SIMPLE_MATRIX + +void matrices_initialisation(double ** p_a, double ** p_b, double ** p_c, + size_t m, size_t k_a, size_t k_b, size_t n, + size_t row, size_t col); +void matrices_allocation(double ** p_a, double ** p_b, double ** p_c, + size_t m, size_t k_a, size_t k_b, size_t n); +void blocks_initialisation(double ** p_a_local, double ** p_b_local, + size_t m, size_t B_k, size_t n); + + +void check_result(double *c, double *a, double *b, + size_t m, size_t n, size_t k_a, size_t k_b, + size_t row, size_t col, + size_t size_row, size_t size_col); diff --git a/examples/smpi/MM/Summa.c b/examples/smpi/MM/Summa.c new file mode 100644 index 0000000000..617bfaa190 --- /dev/null +++ b/examples/smpi/MM/Summa.c @@ -0,0 +1,157 @@ +/*! + * Classical Block Matrix Multiplication example + * + * Authors: Quintin Jean-Noël + */ +#include "Matrix_init.h" +#include "Summa.h" +#include "timer.h" +#include "xbt/log.h" + XBT_LOG_NEW_DEFAULT_CATEGORY(MM_Summa, + "Messages specific for this msg example"); + +inline double Summa( + double *a, double *b, double *c, + size_t lda, size_t ldb, size_t ldc, + size_t m, size_t k_a, size_t k_b, size_t n, + size_t Block_size, size_t start, size_t end, + size_t row, size_t col, size_t size_row, size_t size_col, + double *a_local, double *b_local, + MPI_Datatype Block_a, MPI_Datatype Block_a_local, + MPI_Datatype Block_b, + MPI_Comm row_comm, MPI_Comm col_comm, int subs) +{ + double *B_a , *B_b ; //matrix blocks + size_t err; + double alpha = 1, beta = 1; //C := alpha * a * b + beta * c + size_t B_proc_col, B_proc_row; // Number of bloc(row or col) on one processor + B_proc_col = k_b / Block_size; // Number of block on one processor + B_proc_row = k_a / Block_size; // Number of block on one processor + + //size_t lda = k_a, ldb = n, ldc = n; + size_t lda_local = lda; + size_t ldb_local = ldb; + + + double time, computation_time = 0, communication_time = 0; + struct timespec start_time, end_time; //time mesure + struct timespec start_time_intern, end_time_intern; //time mesure + + + + + get_time(&start_time); + + /*-------------Distributed Matrix Multiplication algorithm-----------------*/ + size_t iter; + for( iter = start; iter < end; iter++ ){ + size_t pivot_row, pivot_col, pos_a, pos_b; +#ifdef CYCLIC + // pivot row on processor layer + pivot_row = (iter % size_col); + pivot_col = (iter % size_row); + //position of the block + if(subs == 1){ + pos_a = (size_t)((iter - start) / size_row) * Block_size; + pos_b = (size_t)((iter - start) / size_col) * ldb * Block_size; + }else{ + pos_a = (size_t)(iter / size_row) * Block_size; + pos_b = (size_t)(iter / size_col) * ldb * Block_size; + } +#else + // pivot row on processor layer + pivot_row = (size_t)(iter / B_proc_col) % size_col; + pivot_col = (size_t)(iter / B_proc_row) % size_row; + //position of the block + if(subs == 1){ + pos_a = ((iter - start) % B_proc_row) * Block_size; + pos_b = ((iter - start) % B_proc_col) * ldb * Block_size; + }else{ + pos_a = (iter % B_proc_row) * Block_size; + pos_b = (iter % B_proc_col) * ldb * Block_size; + } +#endif + XBT_DEBUG( "pivot: %zu, iter: %zu, B_proc_col: %zu, " + "size_col:%zu, size_row: %zu\n", + pivot_row, iter, B_proc_row,size_col,size_row); + MPI_Barrier(row_comm); + MPI_Barrier(col_comm); + + get_time(&start_time_intern); + //Broadcast the row + if(size_row > 1){ + MPI_Datatype * Block; + if( pivot_col != col ){ + B_a = a_local; + lda_local = Block_size; + XBT_DEBUG("recieve B_a %zu,%zu \n",m , Block_size); + Block = &Block_a_local; + }else{ + B_a = a + pos_a; + lda_local = lda; + XBT_DEBUG("sent B_a %zu,%zu \n",m , Block_size); + Block = &Block_a; + } + err = MPI_Bcast(B_a, 1, *Block, pivot_col, row_comm); + if (err != MPI_SUCCESS) { + perror("Error Bcast A\n"); + return -1; + } + }else{ + B_a = a + pos_a; + XBT_DEBUG("position of B_a: %zu \n", pos_a); + } + + //Broadcast the col + if(size_col > 1){ + if( pivot_row == row ){ + B_b = b + pos_b; + XBT_DEBUG("sent B_b Block_size: %zu, pos:%zu \n", + ldb, pos_b); + }else{ + B_b = b_local; + XBT_DEBUG("recieve B_b %zu,%zu \n", Block_size,n); + } + err = MPI_Bcast(B_b, 1, Block_b, pivot_row, col_comm ); + if (err != MPI_SUCCESS) { + perror("Error Bcast B\n"); + MPI_Finalize(); + exit(-1); + } + }else{ + B_b = b + pos_b; + XBT_DEBUG("position of B_b: %zu \n", pos_b); + } + get_time(&end_time_intern); + communication_time += get_timediff(&start_time_intern,&end_time_intern); + + MPI_Barrier(row_comm); + MPI_Barrier(col_comm); + get_time(&start_time_intern); + XBT_DEBUG("execute Gemm number: %zu\n", iter); + //We have recieved a line of block and a colomn + // cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, + // m, n, Block_size, alpha, B_a, lda_local, B_b, ldb_local, + // beta, c, ldc ); + int i, j, k; + for(i = 0; i < m; i++) + for(j = 0; j < n; j++) + for(k = 0; k < Block_size; k++) + c[i*ldc+j] += B_a[j*lda_local+k]*B_b[k*ldb_local+j]; + + get_time(&end_time_intern); + computation_time += get_timediff(&start_time_intern,&end_time_intern); + + } + MPI_Barrier(row_comm); + MPI_Barrier(col_comm); + + get_time(&end_time); + time = get_timediff(&start_time,&end_time); + printf("communication time: %le nanoseconds, " + "computation time: %le nanoseconds\n", + communication_time, computation_time); + + + return time; +} diff --git a/examples/smpi/MM/Summa.h b/examples/smpi/MM/Summa.h new file mode 100644 index 0000000000..ef0dcccb40 --- /dev/null +++ b/examples/smpi/MM/Summa.h @@ -0,0 +1,12 @@ +#include +double Summa( + double *a, double *b, double *c, + size_t lda, size_t ldb, size_t ldc, + size_t m, size_t k_a, size_t k_b, size_t n, + size_t Block_size, size_t start, size_t end, + size_t row, size_t col, size_t size_row, size_t size_col, + double *a_local, double *b_local, + MPI_Datatype Block_a, MPI_Datatype Block_a_local, + MPI_Datatype Block_b, + MPI_Comm row_comm, MPI_Comm col_comm, int subs); + diff --git a/examples/smpi/MM/command_exemple b/examples/smpi/MM/command_exemple new file mode 100644 index 0000000000..5c524d80df --- /dev/null +++ b/examples/smpi/MM/command_exemple @@ -0,0 +1 @@ +../../../bin/smpirun --cfg=smpi/running_power:2.1175E9 -np 2 -platform ../../platforms/cluster.xml -hostfile host ./MM_mpi diff --git a/examples/smpi/MM/host b/examples/smpi/MM/host new file mode 100644 index 0000000000..f47998485d --- /dev/null +++ b/examples/smpi/MM/host @@ -0,0 +1,4 @@ +c-1.me +c-2.me +c-4.me +c-6.me diff --git a/examples/smpi/MM/param.c b/examples/smpi/MM/param.c new file mode 100644 index 0000000000..3cab7f9056 --- /dev/null +++ b/examples/smpi/MM/param.c @@ -0,0 +1,193 @@ +/*! + * get the parameter specific to the process from a file + * + * + * Authors: Quintin Jean-Noël + */ + +#include "topo.h" +#include "param.h" +#include +#include +#include +#include +#include "xbt/log.h" +XBT_LOG_NEW_DEFAULT_CATEGORY(MM_param, + "Messages specific for this msg example"); + +int match(const char *s, char p) +{ + int c = 0; + while (*s != '\0') { + if (strncmp(s++, &p, 1)) continue; + c++; + } + return c; +} + +char** get_list_param(const char* input){ + if(input==NULL) return NULL; + char *line = strdup(input); + int size = match(line, ' '); + char **list_param = malloc(size*sizeof(char*)); + char *pch = strtok (line," \t\n"); + int i = 0; + while (pch != NULL) + { + if(strcmp(pch,"") != 0 ) { + list_param[i] = pch; + i++; + } + pch = strtok (NULL, " \t\n"); + } + return list_param; +} + +char** get_conf(MPI_Comm comm, char * filename, int mynoderank) +{ + if(filename == NULL) return NULL; + if(mynoderank == -1){ + MPI_Comm comm_node; + split_comm_intra_node(comm, &comm_node, -1); + MPI_Comm_rank ( comm_node, &mynoderank ); + MPI_Comm_free(&comm_node); + } + char name[MPI_MAX_PROCESSOR_NAME]; + int len; + MPI_Get_processor_name(name, &len); + FILE* conf; + conf = fopen(filename, "r"); + if (conf == NULL) { + XBT_DEBUG( + "Try to open the configuration file %s\n", filename); + perror("fopen"); + return NULL; + } + char *machine_name = NULL; + size_t number = 0; + int err = 1; + int index = -1; + + + /* Try to find the line correponding to this processor */ + while((err = getdelim(&machine_name, &number, ' ', conf)) != -1) { + while(err == 1){ + err = getdelim(&machine_name, &number, ' ', conf); + } + if(err == -1) break; + XBT_DEBUG( + "read: %s cmp to %s\n", machine_name, name); + /* the first element is in machine_name + it's normally a processor name */ + /* remove the delimiter before doing the comparison*/ + machine_name[strlen(machine_name)-1] = 0; + + if(strcmp(machine_name,name) == 0){ + /* the machine name match */ + + /* try to get for which process with the index*/ + char* char_index=NULL; + err = getdelim(&char_index, &number, ' ', conf); + while(err == 1){ + err = getdelim(&char_index, &number, ' ', conf); + } + if(err == -1) break; + + index=atoi(char_index); + XBT_DEBUG( + "read: %d cmp to %d\n", + index, mynoderank); + + if(index == mynoderank){ + /* we have found the good line + * we rebuild the line to get every information*/ + char* line = NULL; + number = 0; + getline(&line,&number,conf); + char* line1 = NULL; + asprintf(&line1,"%s %s %s",name,char_index,line); + return get_list_param(line1); + } + } + + /*prepare for the next line*/ + free(machine_name); + machine_name = NULL; + number = 0; + err = getline(&machine_name, &number, conf); + if (err >= 1) { + free(machine_name); + machine_name = NULL; + number = 0; + } + + } + XBT_DEBUG( + "No configuration for %s %d\n", name, mynoderank ); + return NULL; +} + + +char*** get_conf_all(char * filename, int * nb_process){ + if(filename == NULL) return NULL; + + char *** all_conf = NULL; + FILE* conf; + int nb_line = 0; + char *line = NULL; + size_t linecap = 0; + ssize_t linelen; + + conf = fopen(filename, "r"); + if (conf == NULL) { + XBT_DEBUG( + "Try to open the configuration file %s\n", filename); + perror("fopen"); + return NULL; + } + + while ((linelen = getline(&line, &linecap, conf)) > 0) + nb_line++; + fclose(conf); + conf = fopen(filename, "r"); + + all_conf = malloc(sizeof(char**) * nb_line); + /* Try to find the line correponding to this processor */ + nb_line = 0; + while ((linelen = getline(&line, &linecap, conf)) > 0){ + if (strcmp(line,"") == 0) continue; //Skip blank line + if (line[0] == '#') continue; //Skip comment line + all_conf[nb_line] = get_list_param(line); + nb_line++; + } + if(nb_process != NULL) *nb_process = nb_line; + return all_conf; +} + + +void print_conf(MPI_Comm comm, int rank, FILE* file, char * default_options){ + char name[MPI_MAX_PROCESSOR_NAME]; + int len; + MPI_Get_processor_name(name, &len); + if(file == NULL) file = stdout; + if(default_options == NULL) default_options = ""; + + MPI_Comm comm_node; + split_comm_intra_node(comm, &comm_node, 0); + + char ** names = get_names(comm); + int* index = get_index( comm, comm_node); + MPI_Comm_free(&comm_node); + int size; + MPI_Comm_size(comm, &size); + int i=0; + if(rank == 0){ + fprintf(file, "#processor_name index USER ARGS (like the cpu binding ...)\n"); + for(i = 0; i < size; i++){ + fprintf(file, "%s %d %s\n", names[i],index[i],default_options); + } + } + free(names); + free(index); + return; +} diff --git a/examples/smpi/MM/param.h b/examples/smpi/MM/param.h new file mode 100644 index 0000000000..fa29681ec8 --- /dev/null +++ b/examples/smpi/MM/param.h @@ -0,0 +1,35 @@ +#ifndef param_H_ +#define param_H_ + +#include + +/*! + * \page param Provide specific parameters to some processes + * List of functions: + * - get_conf - reads a file and returns the specific parameters for the process + * - print_conf - prints the file for the platform used for this execution + * + * here an examples of file: + * + * \include utils/param_template_file.txt + * + * each process will get the list of arguments specific to its. + * + * for example, the process 3th on the node1 will receive a pointer + * on an array of char*: + * + * + * "node1" "2" "node1_arg21" "node1_arg22" "node1_arg2" + */ + +/*! reads a file and returns the specific parameters for the process */ +char** get_conf(MPI_Comm comm, char * filename, int mynoderank); + +/*! reads a file and returns the parameters of every processes */ +char*** get_conf_all(char * filename, int * nb_process); + + +/*! prints the file for the platform used for this execution */ +void print_conf(MPI_Comm comm, int rank, FILE* file, char * default_options); + +#endif /*param_H_*/ diff --git a/examples/smpi/MM/param_template_file.txt b/examples/smpi/MM/param_template_file.txt new file mode 100644 index 0000000000..d983d3ea22 --- /dev/null +++ b/examples/smpi/MM/param_template_file.txt @@ -0,0 +1,9 @@ +node1 0 node1_arg01 node1_arg02 node1_arg03 +node1 1 node1_arg11 node1_arg12 node1_arg13 +node1 2 node1_arg21 node1_arg22 node1_arg23 +node1 3 node1_arg31 node1_arg32 node1_arg33 +node2 0 node2_arg01 node2_arg02 node2_arg03 +node2 1 node2_arg11 node2_arg12 node2_arg13 +node3 0 node3_arg01 node3_arg02 node3_arg03 +node3 1 node3_arg11 node3_arg12 node3_arg13 +node3 2 node3_arg21 node3_arg22 node3_arg23 diff --git a/examples/smpi/MM/timer.c b/examples/smpi/MM/timer.c new file mode 100644 index 0000000000..4b2e0adcd0 --- /dev/null +++ b/examples/smpi/MM/timer.c @@ -0,0 +1,35 @@ +# include "timer.h" +#include +#include +#include +#include "xbt/log.h" + XBT_LOG_NEW_DEFAULT_CATEGORY(MM_timer, + "Messages specific for this msg example"); + +/* this could be specific for some processors + * the default solution seems to be accurate enough +#define CLOCK_TIMER CLOCK_MONOTONIC_RAW + */ + +inline double get_microsecond(struct timespec *res){ + return (res->tv_sec*1000000 + res->tv_nsec/1000); +} +inline double get_nanosecond(struct timespec *res){ + return (res->tv_sec*1000000000 + res->tv_nsec); +} +inline double get_second(struct timespec *res){ + return (res->tv_sec + res->tv_nsec/1000000000); +} + +inline int get_time(struct timespec *tp){ + double time = MPI_Wtime(); + time_t value = (time_t)floor(time); + time -= (double) value; + time = time * 1000000000; + tp->tv_nsec = (long) time; + tp->tv_sec = value ; +} + +double get_timediff(struct timespec *start, struct timespec *end){ + return (double)(-start->tv_sec - ((double)start->tv_nsec)/1000000000 + end->tv_sec + ((double)end->tv_nsec)/1000000000); +} diff --git a/examples/smpi/MM/timer.h b/examples/smpi/MM/timer.h new file mode 100644 index 0000000000..5a59a24ea4 --- /dev/null +++ b/examples/smpi/MM/timer.h @@ -0,0 +1,16 @@ +#ifndef timer_H_ +#define timer_H_ +/*! + * \defgroup timer.h + * here, we provides a generic interface to time some parts of the code + */ +# include + +inline double get_microsecond(struct timespec *res); +inline double get_nanosecond(struct timespec *res); + + + +int get_time(struct timespec *tp); +double get_timediff(struct timespec *start, struct timespec * end); +#endif /*timer_H_*/ diff --git a/examples/smpi/MM/topo.c b/examples/smpi/MM/topo.c new file mode 100644 index 0000000000..e26a53b448 --- /dev/null +++ b/examples/smpi/MM/topo.c @@ -0,0 +1,67 @@ +/*! + * get the information of which thread are on the same node + * + * + * Authors: Quintin Jean-Noël + */ + +#include +#include +#include +#include "topo.h" + +int split_comm_intra_node(MPI_Comm comm, MPI_Comm* comm_intra, int key) { + // collect processor names + char name[MPI_MAX_PROCESSOR_NAME]; + int len; + int size; + char** names = get_names(comm); + int color = -1; + int i = 0; + + MPI_Get_processor_name(name, &len); + MPI_Comm_size(comm, &size); + while (i < size){ + if (strcmp(name, names[i]) == 0) { + break; + } + i++; + } + color = i; + free(names); + // split the communicator + return MPI_Comm_split(comm, color, key, comm_intra); +} + +char** get_names(MPI_Comm comm){ + char name[MPI_MAX_PROCESSOR_NAME]; + int len; + int size; + int i; + char** friendly_names; + char* names; + + MPI_Get_processor_name(name, &len); + MPI_Comm_size(comm, &size); + friendly_names = malloc(sizeof(char*) * size); + names = malloc(sizeof(char) * MPI_MAX_PROCESSOR_NAME * size); + + MPI_Allgather(name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, names, + MPI_MAX_PROCESSOR_NAME, MPI_CHAR, comm); + + for( i = 0; i < size;i++) friendly_names[i] = &names[MPI_MAX_PROCESSOR_NAME * i]; + return friendly_names; +} + +int* get_index(MPI_Comm comm, MPI_Comm comm_intra){ + int rank = 0; + int size; + int* index; + + MPI_Comm_rank(comm_intra, &rank); + MPI_Comm_size(comm, &size); + index = (int*)malloc(sizeof(int) * size); + MPI_Allgather(&rank, 1, MPI_INT, index, 1, MPI_INT, comm); + return index; +} + diff --git a/examples/smpi/MM/topo.h b/examples/smpi/MM/topo.h new file mode 100644 index 0000000000..06e29e5d06 --- /dev/null +++ b/examples/smpi/MM/topo.h @@ -0,0 +1,16 @@ +#ifndef topo_H_ +#define topo_H_ +#include + +/*! + * \defgroup topo.h + * file related to the plateform description. + * + * provide a communicator between the process on the same node*/ +int split_comm_intra_node(MPI_Comm comm, MPI_Comm* comm_intra, int key); + + +int* get_index(MPI_Comm comm, MPI_Comm comm_intra); +char** get_names(MPI_Comm comm); + +#endif /*topo_H_*/ -- 2.20.1