1 /* Copyright (c) 2012-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * 2.5D Block Matrix Multiplication example
12 #include "Matrix_init.h"
20 XBT_LOG_NEW_DEFAULT_CATEGORY(25D_MM,
21 "Messages specific for this msg example");
24 size_t m, size_t k, size_t n,
25 size_t Block_size, size_t group, size_t key,
26 size_t size_row, size_t size_col, size_t NB_groups ){
29 /* Split the communicator into groups */
31 /* Find out my identity in the default communicator */
38 double time, communication_time = 0;
39 double start_time, end_time; //time mesure
40 double end_time_intern; //time mesure
41 double start_time_reduce, end_time_reduce; //time mesure
45 if ( group >= NB_groups ){
47 "Not enough group NB_groups : %zu my group id : %zu\n",
49 MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world);
52 MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world);
55 MPI_Comm_size (my_world, &NB_proc);
57 if ( NB_proc < (int)(size_row*size_col*NB_groups) ){
59 "Not enough processors NB_proc : %d required : %zu\n",
60 NB_proc, size_row*size_col*NB_groups);
67 MPI_Comm_split(my_world, group, key, &group_comm);
69 MPI_Comm_rank(group_comm, &myrank);
70 MPI_Comm_size (group_comm, &NB_proc);
71 /* for each group start the execution of his */
74 NB_proc=size_row*size_col;
75 size_t row = myrank / size_row;
76 size_t col = myrank % size_row;
81 /*-------------------------Check some mandatory conditions------------------*/
82 size_t NB_Block = k / Block_size;
83 if ( k % Block_size != 0 ){
85 "The matrix size has to be proportionnal to the number\
86 of blocks: %zu\n", NB_Block);
90 if ( size_row > NB_Block || size_col > NB_Block ){
92 "Number of blocks is too small compare to the number of"
93 " processors (%zu,%zu) in a row or a col (%zu)\n",
94 size_col, size_row, NB_Block);
98 if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){
99 XBT_INFO("The number of Block by processor is not an %s",
106 if(row >= size_col || col >= size_row){
107 XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, "
108 "size_col: %zu , size_row: %zu \n",
109 col,row,size_col,size_row);
115 /*----------------------Prepare the Communication Layer-------------------*/
116 /* add useless processor on a new color to execute the matrix
117 * multiplication with the other processors*/
119 /* Split comm size_to row and column comms */
120 MPI_Comm row_comm, col_comm, group_line;
121 MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line);
122 /* color by row, rank by column */
123 MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm);
124 /* color by column, rank by row */
125 MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm);
126 /*------------------------Communication Layer can be used-----------------*/
130 XBT_DEBUG( "I'm initialized col: %zu row: %zu, "
131 "size_col: %zu , size_row: %zu, my rank: %d \n"
132 ,col,row,size_col,size_row, myrank);
136 /*------------------------Initialize the matrices---------------------------*/
138 /* think about a common interface
139 * int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
140 * m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb,
141 * beta, c, ldc, Comm, rank );
144 /*------------------------Prepare the Communication Layer-------------------*/
145 /* Split comm size_to row and column comms */
146 MPI_Comm row_comm, col_comm, group_line;
147 MPI_Comm_split(my_world, myrank, group, &group_line);
148 /* color by row, rank by column */
149 MPI_Comm_split(group_comm, row, col, &row_comm);
150 /* color by column, rank by row */
151 MPI_Comm_split(group_comm, col, row, &col_comm);
152 /*-------------------------Communication Layer can be used------------------*/
157 size_t k_a = k / size_row;
158 size_t k_b = k / size_col;
162 /*only on the group 0*/
164 matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col);
165 if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n );
166 } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n);
169 /*-------------------Configuration for Summa algorihtm--------------------*/
170 /*--------------------Allocation of matrices block-------------------------*/
171 double *a_Summa, *b_Summa;
172 blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
174 /*--------------------Communication types for MPI--------------------------*/
175 MPI_Datatype Block_a;
176 MPI_Datatype Block_a_local;
177 MPI_Datatype Block_b;
178 MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
179 MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
180 MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
181 MPI_Type_commit(&Block_a);
182 MPI_Type_commit(&Block_a_local);
183 MPI_Type_commit(&Block_b);
184 /*-------------Communication types for MPI are configured------------------*/
189 MPI_Barrier(my_world);
190 start_time = MPI_Wtime();
191 if( NB_groups > 1 ) {
192 err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
193 if (err != MPI_SUCCESS) {
194 perror("Error Bcast A\n");
197 err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
198 if (err != MPI_SUCCESS) {
199 perror("Error Bcast B\n");
202 MPI_Barrier(my_world);
204 end_time_intern = MPI_Wtime();
205 communication_time += end_time_intern - start_time;
207 XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n"
208 ,group,NB_Block, NB_groups);
210 "m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, "
211 "group*NB_Block/NB_groups %zu, "
212 "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu,"
213 "size_row %zu, size_col %zu\n" ,
214 m, k_a, k_b, n, Block_size,
215 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
216 row, col, size_row, size_col);
219 Summa( a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size,
220 group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
221 row, col, size_row, size_col, a_Summa, b_Summa, Block_a,
222 Block_a_local, Block_b, row_comm, col_comm, 0);
224 /*-------------------------End Summa algorihtm-----------------------------*/
226 MPI_Comm_rank(group_line, &myrank);
228 MPI_Barrier(my_world);
229 start_time_reduce = MPI_Wtime();
230 if( NB_groups > 1 ) {
231 // a gather is better?
232 err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
233 if (err != MPI_SUCCESS) {
234 perror("Error Bcast A\n");
242 MPI_Barrier(my_world);
243 end_time_reduce = MPI_Wtime();
245 MPI_Barrier(my_world);
246 end_time = MPI_Wtime();
247 time = end_time - start_time;
248 double reduce_time = end_time_reduce - start_time_reduce;
249 printf("communication time: %e reduce time: %e seconds, "
250 "total time: %e seconds\n",communication_time,reduce_time,time);
251 MPI_Barrier(my_world);
255 check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
258 // close properly the pragram
260 MPI_Type_free(&Block_a);
261 MPI_Type_free(&Block_a_local);
262 MPI_Type_free(&Block_b);
270 if( NB_groups > 1 ) {
275 MPI_Barrier(MPI_COMM_WORLD);
276 MPI_Comm_free(&my_world);
277 MPI_Comm_free(&group_comm);
278 MPI_Comm_free(&group_line);
279 MPI_Comm_free(&row_comm);
280 MPI_Comm_free(&col_comm);