2 * 2.5D Block Matrix Multiplication example
6 #include "Matrix_init.h"
13 XBT_LOG_NEW_DEFAULT_CATEGORY(25D_MM,
14 "Messages specific for this msg example");
17 size_t m, size_t k, size_t n,
18 size_t Block_size, size_t group, size_t key,
19 size_t size_row, size_t size_col, size_t NB_groups ){
22 /* Split the communicator into groups */
24 /* Find out my identity in the default communicator */
31 double time, communication_time = 0;
32 double start_time, end_time; //time mesure
33 double end_time_intern; //time mesure
34 double start_time_reduce, end_time_reduce; //time mesure
38 if ( group >= NB_groups ){
40 "Not enough group NB_groups : %zu my group id : %zu\n",
42 MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world);
45 MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world);
48 MPI_Comm_size (my_world, &NB_proc);
50 if ( NB_proc < (int)(size_row*size_col*NB_groups) ){
52 "Not enough processors NB_proc : %d required : %zu\n",
53 NB_proc, size_row*size_col*NB_groups);
60 MPI_Comm_split(my_world, group, key, &group_comm);
62 MPI_Comm_rank(group_comm, &myrank);
63 MPI_Comm_size (group_comm, &NB_proc);
64 /* for each group start the execution of his */
67 NB_proc=size_row*size_col;
68 size_t row = myrank / size_row;
69 size_t col = myrank % size_row;
74 /*-------------------------Check some mandatory conditions------------------*/
75 size_t NB_Block = k / Block_size;
76 if ( k % Block_size != 0 ){
78 "The matrix size has to be proportionnal to the number\
79 of blocks: %zu\n", NB_Block);
83 if ( size_row > NB_Block || size_col > NB_Block ){
85 "Number of blocks is too small compare to the number of"
86 " processors (%zu,%zu) in a row or a col (%zu)\n",
87 size_col, size_row, NB_Block);
91 if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){
92 XBT_INFO("The number of Block by processor is not an %s",
99 if(row >= size_col || col >= size_row){
100 XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, "
101 "size_col: %zu , size_row: %zu \n",
102 col,row,size_col,size_row);
108 /*----------------------Prepare the Communication Layer-------------------*/
109 /* add useless processor on a new color to execute the matrix
110 * multiplication with the other processors*/
112 /* Split comm size_to row and column comms */
113 MPI_Comm row_comm, col_comm, group_line;
114 MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line);
115 /* color by row, rank by column */
116 MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm);
117 /* color by column, rank by row */
118 MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm);
119 /*------------------------Communication Layer can be used-----------------*/
123 XBT_DEBUG( "I'm initialized col: %zu row: %zu, "
124 "size_col: %zu , size_row: %zu, my rank: %d \n"
125 ,col,row,size_col,size_row, myrank);
129 /*------------------------Initialize the matrices---------------------------*/
131 /* think about a common interface
132 * int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
133 * m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb,
134 * beta, c, ldc, Comm, rank );
137 /*------------------------Prepare the Communication Layer-------------------*/
138 /* Split comm size_to row and column comms */
139 MPI_Comm row_comm, col_comm, group_line;
140 MPI_Comm_split(my_world, myrank, group, &group_line);
141 /* color by row, rank by column */
142 MPI_Comm_split(group_comm, row, col, &row_comm);
143 /* color by column, rank by row */
144 MPI_Comm_split(group_comm, col, row, &col_comm);
145 /*-------------------------Communication Layer can be used------------------*/
150 size_t k_a = k / size_row;
151 size_t k_b = k / size_col;
155 /*only on the group 0*/
157 matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col);
158 if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n );
159 } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n);
162 /*-------------------Configuration for Summa algorihtm--------------------*/
163 /*--------------------Allocation of matrices block-------------------------*/
164 double *a_Summa, *b_Summa;
165 blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
167 /*--------------------Communication types for MPI--------------------------*/
168 MPI_Datatype Block_a;
169 MPI_Datatype Block_a_local;
170 MPI_Datatype Block_b;
171 MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
172 MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
173 MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
174 MPI_Type_commit(&Block_a);
175 MPI_Type_commit(&Block_a_local);
176 MPI_Type_commit(&Block_b);
177 /*-------------Communication types for MPI are configured------------------*/
182 MPI_Barrier(my_world);
183 start_time = MPI_Wtime();
184 if( NB_groups > 1 ) {
185 err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
186 if (err != MPI_SUCCESS) {
187 perror("Error Bcast A\n");
190 err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
191 if (err != MPI_SUCCESS) {
192 perror("Error Bcast B\n");
195 MPI_Barrier(my_world);
197 end_time_intern = MPI_Wtime();
198 communication_time += end_time_intern - start_time;
200 XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n"
201 ,group,NB_Block, NB_groups);
203 "m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, "
204 "group*NB_Block/NB_groups %zu, "
205 "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu,"
206 "size_row %zu, size_col %zu\n" ,
207 m, k_a, k_b, n, Block_size,
208 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
209 row, col, size_row, size_col);
212 Summa( a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size,
213 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
214 row, col, size_row, size_col, a_Summa, b_Summa, Block_a,
215 Block_a_local, Block_b, row_comm, col_comm, 0);
217 /*-------------------------End Summa algorihtm-----------------------------*/
219 MPI_Comm_rank(group_line, &myrank);
221 MPI_Barrier(my_world);
222 start_time_reduce = MPI_Wtime();
223 if( NB_groups > 1 ) {
224 // a gather is better?
225 err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
226 if (err != MPI_SUCCESS) {
227 perror("Error Bcast A\n");
235 MPI_Barrier(my_world);
236 end_time_reduce = MPI_Wtime();
238 MPI_Barrier(my_world);
239 end_time = MPI_Wtime();
240 time = end_time - start_time;
241 double reduce_time = end_time_reduce - start_time_reduce;
242 printf("communication time: %le reduce time: %le seconds, "
243 "total time: %le seconds\n",communication_time,reduce_time,time);
244 MPI_Barrier(my_world);
248 check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
251 // close properly the pragram
253 MPI_Type_free(&Block_a);
254 MPI_Type_free(&Block_a_local);
255 MPI_Type_free(&Block_b);
263 if( NB_groups > 1 ) {
268 MPI_Barrier(MPI_COMM_WORLD);
269 MPI_Comm_free(&my_world);
270 MPI_Comm_free(&group_comm);
271 MPI_Comm_free(&group_line);
272 MPI_Comm_free(&row_comm);
273 MPI_Comm_free(&col_comm);