Inline is overkill for this function.

[simgrid.git] / examples / smpi / MM / Summa.c
diff --git a/examples/smpi/MM/Summa.c b/examples/smpi/MM/Summa.c

index 617bfaa..b2fb9aa 100644 (file)
--- a/examples/smpi/MM/Summa.c
+++ b/examples/smpi/MM/Summa.c
@@ -1,31 +1,34 @@
  /*!
   * Classical Block Matrix Multiplication example
   *
- * Authors: Quintin Jean-Noël
   */
+
  #include "Matrix_init.h"
  #include "Summa.h"
-#include "timer.h"
  #include "xbt/log.h"
+#include <stdio.h>
+
   XBT_LOG_NEW_DEFAULT_CATEGORY(MM_Summa,
                               "Messages specific for this msg example");
  
-inline double Summa(
-                     double *a, double *b, double *c,
-                     size_t lda, size_t ldb, size_t ldc,
-                     size_t m, size_t k_a, size_t k_b, size_t n,
-                     size_t Block_size, size_t start, size_t end,
-                     size_t row, size_t col, size_t size_row, size_t size_col,
-                     double *a_local, double *b_local,
-                     MPI_Datatype Block_a, MPI_Datatype Block_a_local,
-                     MPI_Datatype Block_b,
-                     MPI_Comm row_comm, MPI_Comm col_comm, int subs)
+double Summa(double *a, double *b, double *c,
+             size_t lda, size_t ldb, size_t ldc,
+             size_t m, size_t k_a, size_t k_b, size_t n,
+             size_t Block_size, size_t start, size_t end,
+             size_t row, size_t col, size_t size_row, size_t size_col,
+             double *a_local, double *b_local,
+             MPI_Datatype Block_a, MPI_Datatype Block_a_local,
+             MPI_Datatype Block_b,
+             MPI_Comm row_comm, MPI_Comm col_comm, int subs)
  {
    double *B_a     , *B_b     ; //matrix blocks
    size_t err;
-  double alpha = 1, beta = 1;  //C := alpha * a * b + beta * c
-  size_t B_proc_col, B_proc_row; // Number of bloc(row or col) on one processor
+  //double alpha = 1, beta = 1;  //C := alpha * a * b + beta * c
+  size_t B_proc_row; // Number of bloc(row or col) on one processor
+#ifndef CYCLIC
+  size_t B_proc_col;
    B_proc_col =  k_b / Block_size;  // Number of block on one processor
+#endif
    B_proc_row = k_a / Block_size; // Number of block on one processor
  
    //size_t lda = k_a, ldb = n, ldc = n;
@@ -34,13 +37,13 @@ inline double Summa(
  
  
    double time, computation_time = 0, communication_time = 0;
-  struct timespec start_time, end_time; //time mesure
-  struct timespec start_time_intern, end_time_intern; //time mesure
+  double start_time, end_time; //time mesure
+  double start_time_intern, end_time_intern; //time mesure
  
  
  
  
-  get_time(&start_time);
+  start_time = MPI_Wtime();
  
    /*-------------Distributed Matrix Multiplication algorithm-----------------*/
    size_t iter;
@@ -74,10 +77,10 @@ inline double Summa(
      XBT_DEBUG( "pivot: %zu, iter: %zu, B_proc_col: %zu, "
                  "size_col:%zu, size_row: %zu\n",
                  pivot_row, iter, B_proc_row,size_col,size_row);
-    MPI_Barrier(row_comm);
-    MPI_Barrier(col_comm);
+/*    MPI_Barrier(row_comm);*/
+/*    MPI_Barrier(col_comm);*/
  
-    get_time(&start_time_intern);
+    start_time_intern = MPI_Wtime();
      //Broadcast the row
      if(size_row > 1){
        MPI_Datatype * Block;
@@ -122,12 +125,12 @@ inline double Summa(
        B_b = b + pos_b;
        XBT_DEBUG("position of B_b: %zu \n", pos_b);
      }
-    get_time(&end_time_intern);
-    communication_time += get_timediff(&start_time_intern,&end_time_intern);
+    end_time_intern = MPI_Wtime();
+    communication_time += end_time_intern - start_time_intern;
  
-    MPI_Barrier(row_comm);
-    MPI_Barrier(col_comm);
-    get_time(&start_time_intern);
+/*    MPI_Barrier(row_comm);*/
+/*    MPI_Barrier(col_comm);*/
+    start_time_intern = MPI_Wtime();
      XBT_DEBUG("execute Gemm number: %zu\n", iter);
      //We have recieved a line of block and a colomn
     //              cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
@@ -137,19 +140,19 @@ inline double Summa(
      for(i = 0; i < m; i++)
        for(j = 0; j < n; j++)
          for(k = 0; k < Block_size; k++)
-          c[i*ldc+j] += B_a[j*lda_local+k]*B_b[k*ldb_local+j];
+          c[i*ldc+j] += B_a[i*lda_local+k]*B_b[k*ldb_local+j];
  
-    get_time(&end_time_intern);
-    computation_time += get_timediff(&start_time_intern,&end_time_intern);
+    end_time_intern = MPI_Wtime();
+    computation_time += end_time_intern - start_time_intern;
  
    }
    MPI_Barrier(row_comm);
    MPI_Barrier(col_comm);
  
-  get_time(&end_time);
-  time = get_timediff(&start_time,&end_time);
-  printf("communication time: %le nanoseconds, "
-         "computation time: %le nanoseconds\n",
+  end_time = MPI_Wtime();
+  time = end_time - start_time ;
+  printf("communication time: %e seconds, "
+         "computation time: %e seconds\n",
           communication_time, computation_time);