2 * Classical Block Matrix Multiplication example
6 #include "Matrix_init.h"
11 XBT_LOG_NEW_DEFAULT_CATEGORY(MM_Summa,
12 "Messages specific for this msg example");
14 double Summa(double *a, double *b, double *c,
15 size_t lda, size_t ldb, size_t ldc,
16 size_t m, size_t k_a, size_t k_b, size_t n,
17 size_t Block_size, size_t start, size_t end,
18 size_t row, size_t col, size_t size_row, size_t size_col,
19 double *a_local, double *b_local,
20 MPI_Datatype Block_a, MPI_Datatype Block_a_local,
22 MPI_Comm row_comm, MPI_Comm col_comm, int subs)
24 double *B_a , *B_b ; //matrix blocks
26 //double alpha = 1, beta = 1; //C := alpha * a * b + beta * c
27 size_t B_proc_row; // Number of bloc(row or col) on one processor
30 B_proc_col = k_b / Block_size; // Number of block on one processor
32 B_proc_row = k_a / Block_size; // Number of block on one processor
34 //size_t lda = k_a, ldb = n, ldc = n;
35 size_t lda_local = lda;
36 size_t ldb_local = ldb;
39 double time, computation_time = 0, communication_time = 0;
40 double start_time, end_time; //time mesure
41 double start_time_intern, end_time_intern; //time mesure
46 start_time = MPI_Wtime();
48 /*-------------Distributed Matrix Multiplication algorithm-----------------*/
50 for( iter = start; iter < end; iter++ ){
51 size_t pivot_row, pivot_col, pos_a, pos_b;
53 // pivot row on processor layer
54 pivot_row = (iter % size_col);
55 pivot_col = (iter % size_row);
56 //position of the block
58 pos_a = (size_t)((iter - start) / size_row) * Block_size;
59 pos_b = (size_t)((iter - start) / size_col) * ldb * Block_size;
61 pos_a = (size_t)(iter / size_row) * Block_size;
62 pos_b = (size_t)(iter / size_col) * ldb * Block_size;
65 // pivot row on processor layer
66 pivot_row = (size_t)(iter / B_proc_col) % size_col;
67 pivot_col = (size_t)(iter / B_proc_row) % size_row;
68 //position of the block
70 pos_a = ((iter - start) % B_proc_row) * Block_size;
71 pos_b = ((iter - start) % B_proc_col) * ldb * Block_size;
73 pos_a = (iter % B_proc_row) * Block_size;
74 pos_b = (iter % B_proc_col) * ldb * Block_size;
77 XBT_DEBUG( "pivot: %zu, iter: %zu, B_proc_col: %zu, "
78 "size_col:%zu, size_row: %zu\n",
79 pivot_row, iter, B_proc_row,size_col,size_row);
80 /* MPI_Barrier(row_comm);*/
81 /* MPI_Barrier(col_comm);*/
83 start_time_intern = MPI_Wtime();
87 if( pivot_col != col ){
89 lda_local = Block_size;
90 XBT_DEBUG("recieve B_a %zu,%zu \n",m , Block_size);
91 Block = &Block_a_local;
95 XBT_DEBUG("sent B_a %zu,%zu \n",m , Block_size);
98 err = MPI_Bcast(B_a, 1, *Block, pivot_col, row_comm);
99 if (err != MPI_SUCCESS) {
100 perror("Error Bcast A\n");
105 XBT_DEBUG("position of B_a: %zu \n", pos_a);
110 if( pivot_row == row ){
112 XBT_DEBUG("sent B_b Block_size: %zu, pos:%zu \n",
116 XBT_DEBUG("recieve B_b %zu,%zu \n", Block_size,n);
118 err = MPI_Bcast(B_b, 1, Block_b, pivot_row, col_comm );
119 if (err != MPI_SUCCESS) {
120 perror("Error Bcast B\n");
126 XBT_DEBUG("position of B_b: %zu \n", pos_b);
128 end_time_intern = MPI_Wtime();
129 communication_time += end_time_intern - start_time_intern;
131 /* MPI_Barrier(row_comm);*/
132 /* MPI_Barrier(col_comm);*/
133 start_time_intern = MPI_Wtime();
134 XBT_DEBUG("execute Gemm number: %zu\n", iter);
135 //We have recieved a line of block and a colomn
136 // cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
137 // m, n, Block_size, alpha, B_a, lda_local, B_b, ldb_local,
140 for(i = 0; i < m; i++)
141 for(j = 0; j < n; j++)
142 for(k = 0; k < Block_size; k++)
143 c[i*ldc+j] += B_a[i*lda_local+k]*B_b[k*ldb_local+j];
145 end_time_intern = MPI_Wtime();
146 computation_time += end_time_intern - start_time_intern;
149 MPI_Barrier(row_comm);
150 MPI_Barrier(col_comm);
152 end_time = MPI_Wtime();
153 time = end_time - start_time ;
154 printf("communication time: %e seconds, "
155 "computation time: %e seconds\n",
156 communication_time, computation_time);