1 /* pmm - parallel matrix multiplication "double diffusion" */
3 /* Copyright (c) 2006-2015. The SimGrid Team.
4 * All rights reserved. */
6 /* This program is free software; you can redistribute it and/or modify it
7 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "simgrid/msg.h"
10 #include "xbt/matrix.h"
11 #include "xbt/xbt_os_time.h"
13 /** @addtogroup MSG_examples
15 * - <b>pmm/msg_pmm.c</b>: Parallel Matrix Multiplication is a little application. This is something that most MPI
16 * developers have written during their class, here implemented using MSG instead of MPI.
19 XBT_LOG_NEW_DEFAULT_CATEGORY(msg_pmm, "Messages specific for this msg example");
21 /* This example should always be executed using a deployment of GRID_SIZE * GRID_SIZE nodes. */
22 #define GRID_SIZE 3 /* Modify to adjust the grid's size */
23 #define NODE_MATRIX_SIZE 300 /* Amount of work done by each node*/
25 #define GRID_NUM_NODES GRID_SIZE * GRID_SIZE
26 #define MATRIX_SIZE NODE_MATRIX_SIZE * GRID_SIZE
27 #define MAILBOX_NAME_SIZE 10
28 #define NEIGHBOURS_COUNT GRID_SIZE - 1
31 * The job sent to every node
33 typedef struct s_node_job{
36 int nodes_in_row[NEIGHBOURS_COUNT];
37 int nodes_in_col[NEIGHBOURS_COUNT];
40 } s_node_job_t, *node_job_t;
43 * Structure for recovering results
45 typedef struct s_result {
49 } s_result_t, *result_t;
51 int node(int argc, char **argv);
52 static void create_jobs(xbt_matrix_t A, xbt_matrix_t B, node_job_t *jobs);
53 static void broadcast_jobs(node_job_t *jobs);
54 static node_job_t wait_job(int selfid);
55 static void broadcast_matrix(xbt_matrix_t M, int num_nodes, int *nodes);
56 static void get_sub_matrix(xbt_matrix_t *sM, int selfid);
57 static void receive_results(result_t *results);
58 static void task_cleanup(void *arg);
60 int node(int argc, char **argv)
62 char my_mbox[MAILBOX_NAME_SIZE];
63 node_job_t myjob, jobs[GRID_NUM_NODES];
64 xbt_matrix_t A, B, C, sA, sB, sC;
67 xbt_assert(argc != 1, "Wrong number of arguments for this node");
69 /* Initialize the node's data-structures */
70 int myid = xbt_str_parse_int(argv[1], "Invalid ID received as first node parameter: %s");
71 snprintf(my_mbox, MAILBOX_NAME_SIZE - 1, "%d", myid);
72 sC = xbt_matrix_double_new_zeros(NODE_MATRIX_SIZE, NODE_MATRIX_SIZE);
75 /* Create the matrices to multiply and one to store the result */
76 A = xbt_matrix_double_new_id(MATRIX_SIZE, MATRIX_SIZE);
77 B = xbt_matrix_double_new_seq(MATRIX_SIZE, MATRIX_SIZE);
78 C = xbt_matrix_double_new_zeros(MATRIX_SIZE, MATRIX_SIZE);
80 /* Create the nodes' jobs */
81 create_jobs(A, B, jobs);
83 /* Get own job first */
86 /* Broadcast the rest of the jobs to the other nodes */
87 broadcast_jobs(jobs + 1);
90 A = B = C = NULL; /* Avoid warning at compilation */
91 myjob = wait_job(myid);
94 /* Multiplication main-loop */
95 XBT_VERB("Start Multiplication's Main-loop");
96 for (int k=0; k < GRID_SIZE; k++){
98 XBT_VERB("Broadcast sA(%d,%d) to row %d", myjob->row, k, myjob->row);
99 broadcast_matrix(myjob->A, NEIGHBOURS_COUNT, myjob->nodes_in_row);
103 XBT_VERB("Broadcast sB(%d,%d) to col %d", k, myjob->col, myjob->col);
104 broadcast_matrix(myjob->B, NEIGHBOURS_COUNT, myjob->nodes_in_col);
107 if(myjob->row == k && myjob->col == k){
108 xbt_matrix_double_addmult(myjob->A, myjob->B, sC);
109 }else if(myjob->row == k){
110 get_sub_matrix(&sA, myid);
111 xbt_matrix_double_addmult(sA, myjob->B, sC);
113 }else if(myjob->col == k){
114 get_sub_matrix(&sB, myid);
115 xbt_matrix_double_addmult(myjob->A, sB, sC);
118 get_sub_matrix(&sA, myid);
119 get_sub_matrix(&sB, myid);
120 xbt_matrix_double_addmult(sA, sB, sC);
126 /* Node 0: gather the results and reconstruct the final matrix */
129 result_t results[GRID_NUM_NODES] = {0};
131 XBT_VERB("Multiplication done.");
133 /* Get the result from the nodes in the GRID */
134 receive_results(results);
136 /* First add our results */
137 xbt_matrix_copy_values(C, sC, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE, 0, 0, 0, 0, NULL);
139 /* Reconstruct the rest of the result matrix */
140 for (node = 1; node < GRID_NUM_NODES; node++){
141 xbt_matrix_copy_values(C, results[node]->sC, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE,
142 NODE_MATRIX_SIZE * results[node]->row, NODE_MATRIX_SIZE * results[node]->col,
144 xbt_matrix_free(results[node]->sC);
145 xbt_free(results[node]);
148 //xbt_matrix_dump(C, "C:res", 0, xbt_matrix_dump_display_double);
154 /* The rest: return the result to node 0 */
158 XBT_VERB("Multiplication done. Send the sub-result.");
160 result = xbt_new0(s_result_t, 1);
161 result->row = myjob->row;
162 result->col = myjob->col;
163 result->sC = xbt_matrix_new_sub(sC, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE, 0, 0, NULL);
164 task = MSG_task_create("result",100,100,result);
165 MSG_task_send(task, "0");
168 /* Clean up and finish*/
170 xbt_matrix_free(myjob->A);
171 xbt_matrix_free(myjob->B);
177 * Broadcast the jobs to the nodes of the grid (except to node 0)
179 static void broadcast_jobs(node_job_t *jobs)
181 char node_mbox[MAILBOX_NAME_SIZE];
182 msg_comm_t comms[GRID_NUM_NODES - 1] = {0};
184 XBT_VERB("Broadcast Jobs");
185 for (int node = 1; node < GRID_NUM_NODES; node++){
186 msg_task_t task = MSG_task_create("Job", 100, 100, jobs[node-1]);
187 snprintf(node_mbox, MAILBOX_NAME_SIZE - 1, "%d", node);
188 comms[node-1] = MSG_task_isend(task, node_mbox);
191 MSG_comm_waitall(comms, GRID_NUM_NODES-1, -1);
192 for (int node = 1; node < GRID_NUM_NODES; node++)
193 MSG_comm_destroy(comms[node - 1]);
196 static node_job_t wait_job(int selfid)
198 msg_task_t task = NULL;
199 char self_mbox[MAILBOX_NAME_SIZE];
200 snprintf(self_mbox, MAILBOX_NAME_SIZE - 1, "%d", selfid);
201 msg_error_t err = MSG_task_receive(&task, self_mbox);
202 xbt_assert(err == MSG_OK, "Error while receiving from %s (%d)", self_mbox, (int)err);
203 node_job_t job = (node_job_t)MSG_task_get_data(task);
204 MSG_task_destroy(task);
205 XBT_VERB("Got Job (%d,%d)", job->row, job->col);
210 static void broadcast_matrix(xbt_matrix_t M, int num_nodes, int *nodes)
212 char node_mbox[MAILBOX_NAME_SIZE];
214 for(int node=0; node < num_nodes; node++){
215 snprintf(node_mbox, MAILBOX_NAME_SIZE - 1, "%d", nodes[node]);
216 xbt_matrix_t sM = xbt_matrix_new_sub(M, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE, 0, 0, NULL);
217 msg_task_t task = MSG_task_create("sub-matrix", 100, 100, sM);
218 MSG_task_dsend(task, node_mbox, task_cleanup);
219 XBT_DEBUG("sub-matrix sent to %s", node_mbox);
223 static void get_sub_matrix(xbt_matrix_t *sM, int selfid)
225 msg_task_t task = NULL;
226 char node_mbox[MAILBOX_NAME_SIZE];
228 XBT_VERB("Get sub-matrix");
230 snprintf(node_mbox, MAILBOX_NAME_SIZE - 1, "%d", selfid);
231 msg_error_t err = MSG_task_receive(&task, node_mbox);
232 xbt_assert(err == MSG_OK, "Error while receiving from %s (%d)", node_mbox, (int)err);
233 *sM = (xbt_matrix_t)MSG_task_get_data(task);
234 MSG_task_destroy(task);
237 static void task_cleanup(void *arg){
238 msg_task_t task = (msg_task_t)arg;
239 xbt_matrix_t m = (xbt_matrix_t)MSG_task_get_data(task);
241 MSG_task_destroy(task);
244 int main(int argc, char *argv[])
246 xbt_os_timer_t timer = xbt_os_timer_new();
248 MSG_init(&argc, argv);
249 MSG_create_environment(argv[1]);
251 MSG_function_register("node", node);
252 for(int i = 0 ; i< 9; i++) {
253 char *hostname = bprintf("node-%d.acme.org", i);
254 char **argvF = xbt_new(char *, 3);
255 argvF[0] = xbt_strdup("node");
256 argvF[1] = bprintf("%d", i);
258 MSG_process_create_with_arguments("node", node, NULL, MSG_host_by_name(hostname), 2, argvF);
262 xbt_os_cputimer_start(timer);
263 msg_error_t res = MSG_main();
264 xbt_os_cputimer_stop(timer);
265 XBT_CRITICAL("Simulated time: %g", MSG_get_clock());
267 return res != MSG_OK;
270 static void create_jobs(xbt_matrix_t A, xbt_matrix_t B, node_job_t *jobs)
272 int row = 0, col = 0;
274 for (int node = 0; node < GRID_NUM_NODES; node++){
275 XBT_VERB("Create job %d", node);
276 jobs[node] = xbt_new0(s_node_job_t, 1);
277 jobs[node]->row = row;
278 jobs[node]->col = col;
280 /* Compute who are the nodes in the same row and column */
281 /* than the node receiving this job */
282 for (int j = 0, k = 0; j < GRID_SIZE; j++) {
283 if (node != (GRID_SIZE * row) + j) {
284 jobs[node]->nodes_in_row[k] = (GRID_SIZE * row) + j;
289 for (int j = 0, k = 0; j < GRID_SIZE; j++) {
290 if (node != (GRID_SIZE * j) + col) {
291 jobs[node]->nodes_in_col[k] = (GRID_SIZE * j) + col;
296 /* Assign a sub matrix of A and B to the job */
298 xbt_matrix_new_sub(A, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE * row, NODE_MATRIX_SIZE * col, NULL);
300 xbt_matrix_new_sub(B, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE, NODE_MATRIX_SIZE * row, NODE_MATRIX_SIZE * col, NULL);
302 if (++col >= GRID_SIZE){
309 static void receive_results(result_t *results) {
310 msg_comm_t comms[GRID_NUM_NODES-1] = {0};
311 msg_task_t tasks[GRID_NUM_NODES-1] = {0};
313 XBT_VERB("Receive Results.");
315 /* Get the result from the nodes in the GRID */
316 for (int node = 1; node < GRID_NUM_NODES; node++)
317 comms[node-1] = MSG_task_irecv(&tasks[node-1], "0");
319 MSG_comm_waitall(comms, GRID_NUM_NODES - 1, -1);
320 for (int node = 1; node < GRID_NUM_NODES; node++)
321 MSG_comm_destroy(comms[node - 1]);
323 /* Reconstruct the result matrix */
324 for (int node = 1; node < GRID_NUM_NODES; node++){
325 results[node] = (result_t)MSG_task_get_data(tasks[node-1]);
326 MSG_task_destroy(tasks[node-1]);