From 06bf176b160c61ded9d6d7fc28936b1a8c928b82 Mon Sep 17 00:00:00 2001 From: degomme Date: Tue, 26 Feb 2019 11:59:36 +0100 Subject: [PATCH] Add simple gemm example for SMPI with sampling macros. Adapted from the proxy app from Codevault. This could be a good basis for the tuto --- examples/smpi/CMakeLists.txt | 6 +- examples/smpi/gemm/gemm.c | 152 +++++++++++++++++++++++++++++++++++ examples/smpi/gemm/gemm.tesh | 17 ++++ 3 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 examples/smpi/gemm/gemm.c create mode 100644 examples/smpi/gemm/gemm.tesh diff --git a/examples/smpi/CMakeLists.txt b/examples/smpi/CMakeLists.txt index c4667df421..f528907faa 100644 --- a/examples/smpi/CMakeLists.txt +++ b/examples/smpi/CMakeLists.txt @@ -5,7 +5,7 @@ if(enable_smpi) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mc/") - foreach(x replay ampi_test trace trace_simple trace_call_location energy) + foreach(x replay ampi_test trace trace_simple trace_call_location energy gemm) add_executable (smpi_${x} EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}) target_link_libraries(smpi_${x} simgrid) set_target_properties(smpi_${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) @@ -28,7 +28,7 @@ endif() foreach(x ampi_test replay) set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.cpp) endforeach() -foreach(x trace trace_simple trace_call_location energy) +foreach(x trace trace_simple trace_call_location energy gemm) set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c) endforeach() foreach(x bugged1 bugged2 bugged1_liveness only_send_deterministic mutual_exclusion non_termination1 @@ -39,6 +39,7 @@ endforeach() set(examples_src ${examples_src} PARENT_SCOPE) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/energy/energy.tesh ${CMAKE_CURRENT_SOURCE_DIR}/trace/trace.tesh + ${CMAKE_CURRENT_SOURCE_DIR}/gemm/gemm.tesh ${CMAKE_CURRENT_SOURCE_DIR}/trace_simple/trace_simple.tesh ${CMAKE_CURRENT_SOURCE_DIR}/trace_call_location/trace_call_location.tesh ${CMAKE_CURRENT_SOURCE_DIR}/ampi_test/ampi_test.tesh @@ -78,6 +79,7 @@ if(enable_smpi) ADD_TESH(smpi-tracing-call-location --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location ${CMAKE_HOME_DIRECTORY}/examples/smpi/trace_call_location/trace_call_location.tesh) ADD_TESH(smpi-replay --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay.tesh) ADD_TESH(smpi-replay-override-replayer --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay-override-replayer.tesh) + ADD_TESH(smpi-gemm --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/gemm --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/gemm --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/gemm ${CMAKE_HOME_DIRECTORY}/examples/smpi/gemm/gemm.tesh) ADD_TESH_FACTORIES(smpi-energy "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/energy --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/energy --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/bin --cd ${CMAKE_BINARY_DIR}/examples/smpi/energy ${CMAKE_HOME_DIRECTORY}/examples/smpi/energy/energy.tesh) ADD_TESH(smpi-ampi --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/ampi_test/ampi_test.tesh) diff --git a/examples/smpi/gemm/gemm.c b/examples/smpi/gemm/gemm.c new file mode 100644 index 0000000000..b6baeae544 --- /dev/null +++ b/examples/smpi/gemm/gemm.c @@ -0,0 +1,152 @@ +/*==================================================================================================*/ +/*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/ +/*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/ +/*#*/ +/*# Author(s):*/ +/*# Valeriu Codreanu */ +/*#*/ +/*# ==================================================================================================*/ + +#include "stdio.h" +#include "mpi.h" + +void multiply(float* a, float* b, float* c, int istart, int iend, int size); +void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size); + + +void multiply(float* a, float* b, float* c, int istart, int iend, int size) +{ + for (int i = istart; i <= iend; ++i) { + for (int j = 0; j < size; ++j) { + for (int k = 0; k < size; ++k) { + c[i*size+j] += a[i*size+k] * b[k*size+j]; + } + } + } +} + +void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size) +{ + //for (int i = istart; i <= iend; ++i) { + SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){ + for (int j = 0; j < size; ++j) { + for (int k = 0; k < size; ++k) { + c[i*size+j] += a[i*size+k] * b[k*size+j]; + } + } + } +} + +int main(int argc, char* argv[]) +{ + int rank, nproc; + int istart, iend; + double start, end; + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if(argc<2){ + if (rank == 0) + printf("Usage : gemm size \"native/sampling\"\n"); + exit(-1); + } + + int size=0; + int read = sscanf(argv[1], "%d", &size); + if(read==0){ + if (rank == 0) + printf("Invalid argument %s\n", argv[1]); + exit(-1); + }else{ + if (rank == 0) + printf("Matrix Size : %dx%d\n",size,size); + } + + float *a = (float*)malloc(sizeof(float)*size*size); + float *b = (float*)malloc(sizeof(float)*size*size); + float *c = (float*)malloc(sizeof(float)*size*size); + + + MPI_Barrier(MPI_COMM_WORLD); + start = MPI_Wtime(); + + if (rank == 0) { + // Initialize buffers. + for (int i = 0; i < size; ++i) { + for (int j = 0; j < size; ++j) { + a[i*size+j] = (float)i + j; + b[i*size+j] = (float)i - j; + c[i*size+j] = 0.0f; + } + } + } + + // Broadcast matrices to all workers. + MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD); + MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD); + MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD); + + // Partition work by i-for-loop. + istart = (size / nproc) * rank; + iend = (size / nproc) * (rank + 1) - 1; + + // Compute matrix multiplication in [istart,iend] + // of i-for-loop. + // C <- C + A x B + if (strcmp(argv[2], "sampling")){ + if (rank == 0) + printf ("Native mode\n"); + multiply(a, b, c, istart, iend, size); + }else{ + if (rank == 0) + printf ("Sampling mode\n"); + multiply_sampled(a, b, c, istart, iend, size); + } + + // Gather computed results. + MPI_Gather(c + (size/nproc*rank), + size*size/nproc, + MPI_FLOAT, + c + (size/nproc*rank), + size*size/nproc, + MPI_FLOAT, + 0, + MPI_COMM_WORLD); + + if (rank == 0) { + // Compute remaining multiplications + // when size % nproc > 0. + if (size % nproc > 0) { + if (strcmp(argv[2], "sampling")) + multiply(a, b, c, (size/nproc)*nproc, size-1, size); + else + multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + end = MPI_Wtime(); + + MPI_Finalize(); + free(a); + free(b); + free(c); + if (rank == 0) { /* use time on master node */ + //float msec_total = 0.0f; + + // Compute and print the performance + float sec_per_matrix_mul = end-start; + double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size; + double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f); + printf( + "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n", + giga_flops, + sec_per_matrix_mul, + flops_per_matrix_mul); + } + + + return 0; +} diff --git a/examples/smpi/gemm/gemm.tesh b/examples/smpi/gemm/gemm.tesh new file mode 100644 index 0000000000..57466d206b --- /dev/null +++ b/examples/smpi/gemm/gemm.tesh @@ -0,0 +1,17 @@ +# use the tested library, not the installed one +# (since we want to pass it to the child, it has to be redefined before each command) +# Go for the first test + +p Test instrumentation of SMPI + +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${srcdir:=.}/../hostfile -platform ${platfdir:=.}/small_platform.xml --cfg=path:${srcdir:=.}/../msg --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/host-speed:1 -np 8 ${bindir:=.}/smpi_gemm 1000 native +> You requested to use 8 ranks, but there is only 5 processes in your hostfile... +> Matrix Size : 1000x1000 +> Native mode +> Performance= 227.39 GFlop/s, Time= 8.795 sec, Size= 2000000000 Ops + +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${srcdir:=.}/../hostfile -platform ${platfdir:=.}/small_platform.xml --cfg=path:${srcdir:=.}/../msg --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/host-speed:1 -np 8 ${bindir:=.}/smpi_gemm 1000 sampling +> You requested to use 8 ranks, but there is only 5 processes in your hostfile... +> Matrix Size : 1000x1000 +> Sampling mode +> Performance= 227.39 GFlop/s, Time= 8.795 sec, Size= 2000000000 Ops -- 2.20.1