1 /* Copyright (c) 2012, 2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 /* Block Matrix Multiplication example */
9 #include "Matrix_init.h"
22 XBT_LOG_NEW_DEFAULT_CATEGORY(MM_mpi, "Messages specific for this msg example");
24 static double two_dot_five(size_t m, size_t k, size_t n, size_t Block_size, size_t group, size_t key, size_t size_row,
25 size_t size_col, size_t NB_groups ){
28 /* Split the communicator into groups */
30 /* Find out my identity in the default communicator */
36 double time, communication_time = 0;
37 double start_time, end_time; //time mesure
38 double end_time_intern; //time mesure
39 double start_time_reduce, end_time_reduce; //time mesure
43 if ( group >= NB_groups ){
44 XBT_DEBUG("Not enough group NB_groups : %zu my group id : %zu\n", NB_groups, group);
45 MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world);
48 MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world);
51 MPI_Comm_size (my_world, &NB_proc);
53 if ( NB_proc < (int)(size_row*size_col*NB_groups) ){
54 XBT_INFO("Not enough processors NB_proc : %d required : %zu\n", NB_proc, size_row*size_col*NB_groups);
59 MPI_Comm_split(my_world, group, key, &group_comm);
61 MPI_Comm_rank(group_comm, &myrank);
62 MPI_Comm_size (group_comm, &NB_proc);
63 /* for each group start the execution of his */
65 NB_proc=size_row*size_col;
66 size_t row = myrank / size_row;
67 size_t col = myrank % size_row;
69 /*-------------------------Check some mandatory conditions------------------*/
70 size_t NB_Block = k / Block_size;
71 if ( k % Block_size != 0 ){
72 XBT_INFO("The matrix size has to be proportionnal to the number of blocks: %zu\n", NB_Block);
76 if ( size_row > NB_Block || size_col > NB_Block ){
77 XBT_INFO("Number of blocks is too small compare to the number of processors (%zu,%zu) in a row or a col (%zu)\n",
78 size_col, size_row, NB_Block);
82 if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){
83 XBT_INFO("The number of Block by processor is not an integer\n");
87 if(row >= size_col || col >= size_row){
88 XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, size_col: %zu , size_row: %zu \n", col,row,size_col,size_row);
93 /*----------------------Prepare the Communication Layer-------------------*/
94 /* add useless processor on a new color to execute the matrix
95 * multiplication with the other processors*/
97 /* Split comm size_to row and column comms */
98 MPI_Comm row_comm, col_comm, group_line;
99 MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line);
100 /* color by row, rank by column */
101 MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm);
102 /* color by column, rank by row */
103 MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm);
104 /*------------------------Communication Layer can be used-----------------*/
108 XBT_DEBUG("I'm initialized col: %zu row: %zu, size_col: %zu , size_row: %zu, my rank: %d \n", col,row,size_col,
111 /*------------------------Initialize the matrices---------------------------*/
112 /* think about a common interface
113 * int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb,
114 * beta, c, ldc, Comm, rank );
117 /*------------------------Prepare the Communication Layer-------------------*/
118 /* Split comm size_to row and column comms */
119 MPI_Comm row_comm, col_comm, group_line;
120 MPI_Comm_split(my_world, myrank, group, &group_line);
121 /* color by row, rank by column */
122 MPI_Comm_split(group_comm, row, col, &row_comm);
123 /* color by column, rank by row */
124 MPI_Comm_split(group_comm, col, row, &col_comm);
125 /*-------------------------Communication Layer can be used------------------*/
130 size_t k_a = k / size_row;
131 size_t k_b = k / size_col;
133 /*only on the group 0*/
135 matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col);
136 if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n );
137 } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n);
139 /*-------------------Configuration for Summa algorihtm--------------------*/
140 /*--------------------Allocation of matrices block-------------------------*/
141 double *a_Summa, *b_Summa;
142 blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
144 /*--------------------Communication types for MPI--------------------------*/
145 MPI_Datatype Block_a;
146 MPI_Datatype Block_a_local;
147 MPI_Datatype Block_b;
148 MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
149 MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
150 MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
151 MPI_Type_commit(&Block_a);
152 MPI_Type_commit(&Block_a_local);
153 MPI_Type_commit(&Block_b);
154 /*-------------Communication types for MPI are configured------------------*/
156 MPI_Barrier(my_world);
157 start_time = MPI_Wtime();
158 if( NB_groups > 1 ) {
159 err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
160 if (err != MPI_SUCCESS) {
161 perror("Error Bcast A\n");
164 err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
165 if (err != MPI_SUCCESS) {
166 perror("Error Bcast B\n");
169 MPI_Barrier(my_world);
171 end_time_intern = MPI_Wtime();
172 communication_time += end_time_intern - start_time;
174 XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n",group,NB_Block, NB_groups);
175 XBT_INFO("m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, group*NB_Block/NB_groups %zu, "
176 "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu, size_row %zu, size_col %zu\n",m, k_a, k_b, n,
177 Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,row, col, size_row, size_col);
180 Summa(a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
181 row, col, size_row, size_col, a_Summa, b_Summa, Block_a, Block_a_local, Block_b, row_comm, col_comm, 0);
183 /*-------------------------End Summa algorihtm-----------------------------*/
185 MPI_Comm_rank(group_line, &myrank);
187 MPI_Barrier(my_world);
188 start_time_reduce = MPI_Wtime();
189 if( NB_groups > 1 ) {
190 // a gather is better?
191 err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
192 if (err != MPI_SUCCESS) {
193 perror("Error Bcast A\n");
201 MPI_Barrier(my_world);
202 end_time_reduce = MPI_Wtime();
204 MPI_Barrier(my_world);
205 end_time = MPI_Wtime();
206 time = end_time - start_time;
207 double reduce_time = end_time_reduce - start_time_reduce;
208 printf("communication time: %e reduce time: %e seconds, total time: %e seconds\n", communication_time, reduce_time,
210 MPI_Barrier(my_world);
214 check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
217 // close properly the pragram
218 MPI_Type_free(&Block_a);
219 MPI_Type_free(&Block_a_local);
220 MPI_Type_free(&Block_b);
227 if( NB_groups > 1 ) {
232 MPI_Barrier(MPI_COMM_WORLD);
233 MPI_Comm_free(&my_world);
234 MPI_Comm_free(&group_comm);
235 MPI_Comm_free(&group_line);
236 MPI_Comm_free(&row_comm);
237 MPI_Comm_free(&col_comm);
241 int main(int argc, char ** argv)
243 size_t m = 1024 , n = 1024 , k = 1024;
244 size_t NB_Block = 16;
245 size_t Block_size = k/NB_Block ;
246 size_t NB_groups = 1, group = 0, key = 0;
253 size_t row, col, size_row, size_col; //description: vitual processor topology
257 MPI_Init(&argc, &argv);
259 /* Find out my identity in the default communicator */
260 MPI_Comm_rank ( MPI_COMM_WORLD, &myrank );
261 MPI_Comm_size ( MPI_COMM_WORLD, &NB_proc );
264 for (size_col=NB_proc/2; NB_proc%size_col; size_col--);
268 size_row = NB_proc/size_col;
269 if (size_row > size_col){
271 size_row = NB_proc/size_col;
282 //get the parameter from command line
283 while ((opt = getopt(argc, argv, "hr:c:M:N:K:B:G:g:k:P:")) != -1) {
286 XBT_INFO("Usage: mxm_cblas_test [options]\n"
287 " -M I M size (default: %zu)\n"
288 " -N I N size (default: %zu)\n"
289 " -K I K size (default: %zu)\n"
290 " -B I Block size on the k dimension (default: %zu)\n"
291 " -G I Number of processor groups (default: %zu)\n"
292 " -g I group index (default: %zu)\n"
293 " -k I group rank (default: %zu)\n"
294 " -r I processor row size (default: %zu)\n"
295 " -c I processor col size (default: %zu)\n"
296 " -h help\n", m, n, k, Block_size, NB_groups, group, key, row, col);
299 m = xbt_str_parse_int(optarg, "Invalid M size: %s");
302 n = xbt_str_parse_int(optarg, "Invalid N size: %s");
305 k = xbt_str_parse_int(optarg, "Invalid K size: %s");
308 Block_size = xbt_str_parse_int(optarg, "Invalid block size: %s");
311 NB_groups = xbt_str_parse_int(optarg, "Invalid number of processor groups: %s");
314 group = xbt_str_parse_int(optarg, "Invalid group index: %s");
317 key = xbt_str_parse_int(optarg, "Invalid group rank: %s");
320 size_row = xbt_str_parse_int(optarg, "Invalid processor row size: %s");
323 size_col = xbt_str_parse_int(optarg, "Invalid processor col size: %s");
328 two_dot_five( m, k, n, Block_size, group, key, size_row, size_col, NB_groups);
330 // close properly the program
331 MPI_Barrier(MPI_COMM_WORLD);