Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add simple gemm example for SMPI with sampling macros.
authordegomme <adegomme@users.noreply.github.com>
Tue, 26 Feb 2019 10:59:36 +0000 (11:59 +0100)
committerdegomme <adegomme@users.noreply.github.com>
Tue, 26 Feb 2019 12:08:02 +0000 (13:08 +0100)
Adapted from the proxy app from Codevault.
This could be a good basis for the tuto

examples/smpi/CMakeLists.txt
examples/smpi/gemm/gemm.c [new file with mode: 0644]
examples/smpi/gemm/gemm.tesh [new file with mode: 0644]

index c4667df..f528907 100644 (file)
@@ -5,7 +5,7 @@ if(enable_smpi)
 
   file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mc/")
 
 
   file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mc/")
 
-  foreach(x replay ampi_test trace trace_simple trace_call_location energy)
+  foreach(x replay ampi_test trace trace_simple trace_call_location energy gemm)
     add_executable       (smpi_${x} EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x})
     target_link_libraries(smpi_${x} simgrid)
     set_target_properties(smpi_${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x})
     add_executable       (smpi_${x} EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x})
     target_link_libraries(smpi_${x} simgrid)
     set_target_properties(smpi_${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x})
@@ -28,7 +28,7 @@ endif()
 foreach(x ampi_test replay)
   set(examples_src  ${examples_src}  ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.cpp)
 endforeach()
 foreach(x ampi_test replay)
   set(examples_src  ${examples_src}  ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.cpp)
 endforeach()
-foreach(x trace trace_simple trace_call_location energy)
+foreach(x trace trace_simple trace_call_location energy gemm)
   set(examples_src  ${examples_src}  ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c)
 endforeach()
 foreach(x bugged1 bugged2 bugged1_liveness only_send_deterministic mutual_exclusion non_termination1
   set(examples_src  ${examples_src}  ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c)
 endforeach()
 foreach(x bugged1 bugged2 bugged1_liveness only_send_deterministic mutual_exclusion non_termination1
@@ -39,6 +39,7 @@ endforeach()
 set(examples_src  ${examples_src}                                                                          PARENT_SCOPE)
 set(tesh_files    ${tesh_files}    ${CMAKE_CURRENT_SOURCE_DIR}/energy/energy.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace/trace.tesh
 set(examples_src  ${examples_src}                                                                          PARENT_SCOPE)
 set(tesh_files    ${tesh_files}    ${CMAKE_CURRENT_SOURCE_DIR}/energy/energy.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace/trace.tesh
+                                   ${CMAKE_CURRENT_SOURCE_DIR}/gemm/gemm.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace_simple/trace_simple.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace_call_location/trace_call_location.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/ampi_test/ampi_test.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace_simple/trace_simple.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/trace_call_location/trace_call_location.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/ampi_test/ampi_test.tesh
@@ -78,6 +79,7 @@ if(enable_smpi)
   ADD_TESH(smpi-tracing-call-location --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location ${CMAKE_HOME_DIRECTORY}/examples/smpi/trace_call_location/trace_call_location.tesh)
   ADD_TESH(smpi-replay         --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay.tesh)
   ADD_TESH(smpi-replay-override-replayer --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay-override-replayer.tesh)
   ADD_TESH(smpi-tracing-call-location --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location ${CMAKE_HOME_DIRECTORY}/examples/smpi/trace_call_location/trace_call_location.tesh)
   ADD_TESH(smpi-replay         --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay.tesh)
   ADD_TESH(smpi-replay-override-replayer --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay-override-replayer.tesh)
+  ADD_TESH(smpi-gemm        --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/gemm --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/gemm --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/gemm ${CMAKE_HOME_DIRECTORY}/examples/smpi/gemm/gemm.tesh)
   ADD_TESH_FACTORIES(smpi-energy "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/energy --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/energy --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/bin --cd ${CMAKE_BINARY_DIR}/examples/smpi/energy ${CMAKE_HOME_DIRECTORY}/examples/smpi/energy/energy.tesh)
 
   ADD_TESH(smpi-ampi --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/ampi_test/ampi_test.tesh)
   ADD_TESH_FACTORIES(smpi-energy "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/energy --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/energy --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/bin --cd ${CMAKE_BINARY_DIR}/examples/smpi/energy ${CMAKE_HOME_DIRECTORY}/examples/smpi/energy/energy.tesh)
 
   ADD_TESH(smpi-ampi --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/ampi_test/ampi_test.tesh)
diff --git a/examples/smpi/gemm/gemm.c b/examples/smpi/gemm/gemm.c
new file mode 100644 (file)
index 0000000..b6baeae
--- /dev/null
@@ -0,0 +1,152 @@
+/*==================================================================================================*/
+/*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/
+/*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/
+/*#*/
+/*# Author(s):*/
+/*#   Valeriu Codreanu <valeriu.codreanu@surfsara.nl>*/
+/*#*/
+/*# ==================================================================================================*/
+
+#include "stdio.h"
+#include "mpi.h"
+
+void multiply(float* a, float* b, float* c, int istart, int iend, int size);
+void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size);
+
+
+void multiply(float* a, float* b, float* c, int istart, int iend, int size)
+{
+    for (int i = istart; i <= iend; ++i) {
+        for (int j = 0; j < size; ++j) {
+            for (int k = 0; k < size; ++k) {
+                c[i*size+j] += a[i*size+k] * b[k*size+j];
+            }
+        }
+    }
+}
+
+void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size)
+{
+    //for (int i = istart; i <= iend; ++i) {
+    SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){
+        for (int j = 0; j < size; ++j) {
+            for (int k = 0; k < size; ++k) {
+                c[i*size+j] += a[i*size+k] * b[k*size+j];
+            }
+        }
+    }
+}
+
+int main(int argc, char* argv[])
+{
+    int rank, nproc;
+    int istart, iend;
+    double start, end;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    
+    if(argc<2){
+      if (rank == 0)
+        printf("Usage : gemm size \"native/sampling\"\n");
+      exit(-1);
+    }
+
+    int size=0;
+    int read = sscanf(argv[1], "%d", &size);
+    if(read==0){
+      if (rank == 0)
+        printf("Invalid argument %s\n", argv[1]);
+      exit(-1);
+    }else{
+      if (rank == 0)
+        printf("Matrix Size : %dx%d\n",size,size);
+    }
+
+    float *a = (float*)malloc(sizeof(float)*size*size);
+    float *b = (float*)malloc(sizeof(float)*size*size);
+    float *c = (float*)malloc(sizeof(float)*size*size);
+
+  
+    MPI_Barrier(MPI_COMM_WORLD);
+    start = MPI_Wtime();
+
+    if (rank == 0) {
+        // Initialize buffers.
+        for (int i = 0; i < size; ++i) {
+            for (int j = 0; j < size; ++j) {
+                a[i*size+j] = (float)i + j;
+                b[i*size+j] = (float)i - j;
+                c[i*size+j] = 0.0f;
+            }
+        }
+    }
+
+    // Broadcast matrices to all workers.
+    MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
+    MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
+    MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
+
+    // Partition work by i-for-loop.
+    istart = (size / nproc) * rank;
+    iend = (size / nproc) * (rank + 1) - 1;
+
+    // Compute matrix multiplication in [istart,iend]
+    // of i-for-loop.
+    // C <- C + A x B
+    if (strcmp(argv[2], "sampling")){
+      if (rank == 0)
+        printf ("Native mode\n");
+      multiply(a, b, c, istart, iend, size);
+    }else{
+      if (rank == 0)
+        printf ("Sampling mode\n");
+      multiply_sampled(a, b, c, istart, iend, size);
+    }
+
+    // Gather computed results.
+    MPI_Gather(c + (size/nproc*rank),
+               size*size/nproc,
+               MPI_FLOAT,
+               c + (size/nproc*rank),
+               size*size/nproc,
+               MPI_FLOAT,
+               0,
+               MPI_COMM_WORLD);
+
+    if (rank == 0) {
+        // Compute remaining multiplications
+        // when size % nproc > 0.
+        if (size % nproc > 0) {
+            if (strcmp(argv[2], "sampling"))
+                multiply(a, b, c, (size/nproc)*nproc, size-1, size);
+           else
+                multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size);
+        }
+    }
+    
+    MPI_Barrier(MPI_COMM_WORLD);
+    end = MPI_Wtime();
+
+    MPI_Finalize();
+    free(a);
+    free(b);
+    free(c);
+    if (rank == 0) { /* use time on master node */
+        //float msec_total = 0.0f;
+
+        // Compute and print the performance
+        float sec_per_matrix_mul = end-start;
+        double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
+        double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f);
+        printf(
+            "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n",
+            giga_flops,
+            sec_per_matrix_mul,
+            flops_per_matrix_mul);
+    }
+   
+
+    return 0;
+}
diff --git a/examples/smpi/gemm/gemm.tesh b/examples/smpi/gemm/gemm.tesh
new file mode 100644 (file)
index 0000000..57466d2
--- /dev/null
@@ -0,0 +1,17 @@
+# use the tested library, not the installed one
+# (since we want to pass it to the child, it has to be redefined before each command)
+# Go for the first test
+
+p Test instrumentation of SMPI
+
+$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${srcdir:=.}/../hostfile -platform ${platfdir:=.}/small_platform.xml --cfg=path:${srcdir:=.}/../msg --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/host-speed:1 -np 8 ${bindir:=.}/smpi_gemm 1000 native
+> You requested to use 8 ranks, but there is only 5 processes in your hostfile...
+> Matrix Size : 1000x1000
+> Native mode
+> Performance= 227.39 GFlop/s, Time= 8.795 sec, Size= 2000000000 Ops
+
+$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${srcdir:=.}/../hostfile -platform ${platfdir:=.}/small_platform.xml --cfg=path:${srcdir:=.}/../msg --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/host-speed:1 -np 8 ${bindir:=.}/smpi_gemm 1000 sampling
+> You requested to use 8 ranks, but there is only 5 processes in your hostfile...
+> Matrix Size : 1000x1000
+> Sampling mode
+> Performance= 227.39 GFlop/s, Time= 8.795 sec, Size= 2000000000 Ops