+ /*-------------------Configuration for Summa algorihtm--------------------*/
+ /*--------------------Allocation of matrices block-------------------------*/
+ double *a_Summa, *b_Summa;
+ blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
+
+ /*--------------------Communication types for MPI--------------------------*/
+ MPI_Datatype Block_a;
+ MPI_Datatype Block_a_local;
+ MPI_Datatype Block_b;
+ MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
+ MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
+ MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
+ MPI_Type_commit(&Block_a);
+ MPI_Type_commit(&Block_a_local);
+ MPI_Type_commit(&Block_b);
+ /*-------------Communication types for MPI are configured------------------*/
+
+ MPI_Barrier(my_world);
+ start_time = MPI_Wtime();
+ if( NB_groups > 1 ) {
+ err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
+ if (err != MPI_SUCCESS) {
+ perror("Error Bcast A\n");
+ return -1;
+ }
+ err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
+ if (err != MPI_SUCCESS) {
+ perror("Error Bcast B\n");
+ return -1;
+ }
+ MPI_Barrier(my_world);
+ }
+ end_time_intern = MPI_Wtime();
+ communication_time += end_time_intern - start_time;
+
+ XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n",group,NB_Block, NB_groups);
+ XBT_INFO("m %zu, k_a %zu, k_b %zu, n %zu, Block_size %zu, group*NB_Block/NB_groups %zu, "
+ "(group+1)*NB_Block/NB_groups %zu, row %zu, col %zu, size_row %zu, size_col %zu\n",m, k_a, k_b, n,
+ Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,row, col, size_row, size_col);
+
+
+ Summa(a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
+ row, col, size_row, size_col, a_Summa, b_Summa, Block_a, Block_a_local, Block_b, row_comm, col_comm, 0);
+
+ /*-------------------------End Summa algorihtm-----------------------------*/
+
+ MPI_Comm_rank(group_line, &myrank);
+
+ MPI_Barrier(my_world);
+ start_time_reduce = MPI_Wtime();
+ if( NB_groups > 1 ) {
+ // a gather is better?
+ err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
+ if (err != MPI_SUCCESS) {
+ perror("Error Bcast A\n");
+ return -1;
+ }
+ }else{
+ double *swap= c;
+ c = res;
+ res=swap;
+ }
+ MPI_Barrier(my_world);
+ end_time_reduce = MPI_Wtime();
+
+ MPI_Barrier(my_world);
+ end_time = MPI_Wtime();
+ time = end_time - start_time;
+ double reduce_time = end_time_reduce - start_time_reduce;
+ printf("communication time: %e reduce time: %e seconds, total time: %e seconds\n", communication_time, reduce_time,
+ time);
+ MPI_Barrier(my_world);
+
+#if CHECK_25D
+ if(myrank == 0)
+ check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
+#endif
+
+ // close properly the pragram
+ MPI_Type_free(&Block_a);
+ MPI_Type_free(&Block_a_local);
+ MPI_Type_free(&Block_b);
+
+ free(a_Summa);
+ free(b_Summa);
+
+ free( a );
+ free( b );
+ if( NB_groups > 1 ) {
+ free( c );
+ }
+ free(res);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Comm_free(&my_world);
+ MPI_Comm_free(&group_comm);
+ MPI_Comm_free(&group_line);
+ MPI_Comm_free(&row_comm);
+ MPI_Comm_free(&col_comm);
+ return 0;
+}