2 * 2.5D Block Matrix Multiplication example
6 #include "Matrix_init.h"
14 XBT_LOG_NEW_DEFAULT_CATEGORY(25D_MM,
15 "Messages specific for this msg example");
18 size_t m, size_t k, size_t n,
19 size_t Block_size, size_t group, size_t key,
20 size_t size_row, size_t size_col, size_t NB_groups ){
23 /* Split the communicator into groups */
25 /* Find out my identity in the default communicator */
32 double time, communication_time = 0;
33 double start_time, end_time; //time mesure
34 double end_time_intern; //time mesure
35 double start_time_reduce, end_time_reduce; //time mesure
39 if ( group >= NB_groups ){
41 "Not enough group NB_groups : %zu my group id : %zu\n",
43 MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world);
46 MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world);
49 MPI_Comm_size (my_world, &NB_proc);
51 if ( NB_proc < (int)(size_row*size_col*NB_groups) ){
53 "Not enough processors NB_proc : %d required : %zu\n",
54 NB_proc, size_row*size_col*NB_groups);
61 MPI_Comm_split(my_world, group, key, &group_comm);
63 MPI_Comm_rank(group_comm, &myrank);
64 MPI_Comm_size (group_comm, &NB_proc);
65 /* for each group start the execution of his */
68 NB_proc=size_row*size_col;
69 size_t row = myrank / size_row;
70 size_t col = myrank % size_row;
75 /*-------------------------Check some mandatory conditions------------------*/
76 size_t NB_Block = k / Block_size;
77 if ( k % Block_size != 0 ){
79 "The matrix size has to be proportionnal to the number\
80 of blocks: %zu\n", NB_Block);
84 if ( size_row > NB_Block || size_col > NB_Block ){
86 "Number of blocks is too small compare to the number of"
87 " processors (%zu,%zu) in a row or a col (%zu)\n",
88 size_col, size_row, NB_Block);
92 if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){
93 XBT_INFO("The number of Block by processor is not an %s",
100 if(row >= size_col || col >= size_row){
101 XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, "
102 "size_col: %zu , size_row: %zu \n",
103 col,row,size_col,size_row);
109 /*----------------------Prepare the Communication Layer-------------------*/
110 /* add useless processor on a new color to execute the matrix
111 * multiplication with the other processors*/
113 /* Split comm size_to row and column comms */
114 MPI_Comm row_comm, col_comm, group_line;
115 MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line);
116 /* color by row, rank by column */
117 MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm);
118 /* color by column, rank by row */
119 MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm);
120 /*------------------------Communication Layer can be used-----------------*/
124 XBT_DEBUG( "I'm initialized col: %zu row: %zu, "
125 "size_col: %zu , size_row: %zu, my rank: %d \n"
126 ,col,row,size_col,size_row, myrank);
130 /*------------------------Initialize the matrices---------------------------*/
132 /* think about a common interface
133 * int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
134 * m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb,
135 * beta, c, ldc, Comm, rank );
138 /*------------------------Prepare the Communication Layer-------------------*/
139 /* Split comm size_to row and column comms */
140 MPI_Comm row_comm, col_comm, group_line;
141 MPI_Comm_split(my_world, myrank, group, &group_line);
142 /* color by row, rank by column */
143 MPI_Comm_split(group_comm, row, col, &row_comm);
144 /* color by column, rank by row */
145 MPI_Comm_split(group_comm, col, row, &col_comm);
146 /*-------------------------Communication Layer can be used------------------*/
151 size_t k_a = k / size_row;
152 size_t k_b = k / size_col;
156 /*only on the group 0*/
158 matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col);
159 if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n );
160 } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n);
163 /*-------------------Configuration for Summa algorihtm--------------------*/
164 /*--------------------Allocation of matrices block-------------------------*/
165 double *a_Summa, *b_Summa;
166 blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
168 /*--------------------Communication types for MPI--------------------------*/
169 MPI_Datatype Block_a;
170 MPI_Datatype Block_a_local;
171 MPI_Datatype Block_b;
172 MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
173 MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
174 MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
175 MPI_Type_commit(&Block_a);
176 MPI_Type_commit(&Block_a_local);
177 MPI_Type_commit(&Block_b);
178 /*-------------Communication types for MPI are configured------------------*/
183 MPI_Barrier(my_world);
184 start_time = MPI_Wtime();
185 if( NB_groups > 1 ) {
186 err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
187 if (err != MPI_SUCCESS) {
188 perror("Error Bcast A\n");
191 err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
192 if (err != MPI_SUCCESS) {
193 perror("Error Bcast B\n");
196 MPI_Barrier(my_world);
198 end_time_intern = MPI_Wtime();
199 communication_time += end_time_intern - start_time;
201 XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n"
202 ,group,NB_Block, NB_groups);
204 "m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, "
205 "group*NB_Block/NB_groups %zu, "
206 "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu,"
207 "size_row %zu, size_col %zu\n" ,
208 m, k_a, k_b, n, Block_size,
209 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
210 row, col, size_row, size_col);
213 Summa( a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size,
214 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
215 row, col, size_row, size_col, a_Summa, b_Summa, Block_a,
216 Block_a_local, Block_b, row_comm, col_comm, 0);
218 /*-------------------------End Summa algorihtm-----------------------------*/
220 MPI_Comm_rank(group_line, &myrank);
222 MPI_Barrier(my_world);
223 start_time_reduce = MPI_Wtime();
224 if( NB_groups > 1 ) {
225 // a gather is better?
226 err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
227 if (err != MPI_SUCCESS) {
228 perror("Error Bcast A\n");
236 MPI_Barrier(my_world);
237 end_time_reduce = MPI_Wtime();
239 MPI_Barrier(my_world);
240 end_time = MPI_Wtime();
241 time = end_time - start_time;
242 double reduce_time = end_time_reduce - start_time_reduce;
243 printf("communication time: %le reduce time: %le seconds, "
244 "total time: %le seconds\n",communication_time,reduce_time,time);
245 MPI_Barrier(my_world);
249 check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
252 // close properly the pragram
254 MPI_Type_free(&Block_a);
255 MPI_Type_free(&Block_a_local);
256 MPI_Type_free(&Block_b);
264 if( NB_groups > 1 ) {
269 MPI_Barrier(MPI_COMM_WORLD);
270 MPI_Comm_free(&my_world);
271 MPI_Comm_free(&group_comm);
272 MPI_Comm_free(&group_line);
273 MPI_Comm_free(&row_comm);
274 MPI_Comm_free(&col_comm);