1 /* Copyright (c) 2010-2015. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
14 #include <simgrid/s4u/host.hpp>
17 #include "src/simix/smx_private.h"
19 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_comm, smpi, "Logging specific to SMPI (comm)");
21 Comm mpi_MPI_COMM_UNINITIALIZED;
22 MPI_Comm MPI_COMM_UNINITIALIZED=&mpi_MPI_COMM_UNINITIALIZED;
24 /* Support for cartesian topology was added, but there are 2 other types of topology, graph et dist graph. In order to
25 * support them, we have to add a field MPIR_Topo_type, and replace the MPI_Topology field by an union. */
27 static int smpi_compare_rankmap(const void *a, const void *b)
29 const int* x = static_cast<const int*>(a);
30 const int* y = static_cast<const int*>(b);
50 xbt_dict_t Comm::keyvals_ = nullptr;
51 int Comm::keyval_id_ = 0;//avoid collisions
53 Comm::Comm(MPI_Group group, MPI_Topology topo) : group_(group), topo_(topo)
56 topoType_ = MPI_INVALID_TOPO;
57 intra_comm_ = MPI_COMM_NULL;
58 leaders_comm_ = MPI_COMM_NULL;
60 non_uniform_map_ = nullptr;
61 leaders_map_ = nullptr;
66 void Comm::destroy(Comm* comm)
68 if (comm == MPI_COMM_UNINITIALIZED){
69 Comm::destroy(smpi_process_comm_world());
72 delete comm->topo_; // there's no use count on topos
76 int Comm::dup(MPI_Comm* newcomm){
77 if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
78 smpi_switch_data_segment(smpi_process_index());
80 MPI_Group cp = new Group(this->group());
81 (*newcomm) = new Comm(cp, this->topo());
82 int ret = MPI_SUCCESS;
84 if(attributes_ !=nullptr){
85 (*newcomm)->attributes_ = xbt_dict_new_homogeneous(nullptr);
86 xbt_dict_cursor_t cursor = nullptr;
91 xbt_dict_foreach (attributes_, cursor, key, value_in) {
92 smpi_comm_key_elem elem =
93 static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null_ext(keyvals_, key, sizeof(int)));
94 if (elem != nullptr && elem->copy_fn != MPI_NULL_COPY_FN) {
95 ret = elem->copy_fn(this, atoi(key), nullptr, value_in, &value_out, &flag);
96 if (ret != MPI_SUCCESS) {
97 Comm::destroy(*newcomm);
98 *newcomm = MPI_COMM_NULL;
99 xbt_dict_cursor_free(&cursor);
103 xbt_dict_set_ext((*newcomm)->attributes_, key, sizeof(int), value_out, nullptr);
110 MPI_Group Comm::group()
112 if (this == MPI_COMM_UNINITIALIZED)
113 return smpi_process_comm_world()->group();
117 MPI_Topology Comm::topo() {
123 if (this == MPI_COMM_UNINITIALIZED)
124 return smpi_process_comm_world()->size();
125 return group_->size();
130 if (this == MPI_COMM_UNINITIALIZED)
131 return smpi_process_comm_world()->rank();
132 return group_->rank(smpi_process_index());
135 void Comm::get_name (char* name, int* len)
137 if (this == MPI_COMM_UNINITIALIZED){
138 smpi_process_comm_world()->get_name(name, len);
141 if(this == MPI_COMM_WORLD) {
142 strncpy(name, "WORLD",5);
145 *len = snprintf(name, MPI_MAX_NAME_STRING, "%p", this);
149 void Comm::set_leaders_comm(MPI_Comm leaders){
150 if (this == MPI_COMM_UNINITIALIZED){
151 smpi_process_comm_world()->set_leaders_comm(leaders);
154 leaders_comm_=leaders;
157 void Comm::set_intra_comm(MPI_Comm leaders){
161 int* Comm::get_non_uniform_map(){
162 if (this == MPI_COMM_UNINITIALIZED)
163 return smpi_process_comm_world()->get_non_uniform_map();
164 return non_uniform_map_;
167 int* Comm::get_leaders_map(){
168 if (this == MPI_COMM_UNINITIALIZED)
169 return smpi_process_comm_world()->get_leaders_map();
173 MPI_Comm Comm::get_leaders_comm(){
174 if (this == MPI_COMM_UNINITIALIZED)
175 return smpi_process_comm_world()->get_leaders_comm();
176 return leaders_comm_;
179 MPI_Comm Comm::get_intra_comm(){
180 if (this == MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD)
181 return smpi_process_get_comm_intra();
182 else return intra_comm_;
185 int Comm::is_uniform(){
186 if (this == MPI_COMM_UNINITIALIZED)
187 return smpi_process_comm_world()->is_uniform();
191 int Comm::is_blocked(){
192 if (this == MPI_COMM_UNINITIALIZED)
193 return smpi_process_comm_world()->is_blocked();
197 MPI_Comm Comm::split(int color, int key)
199 if (this == MPI_COMM_UNINITIALIZED)
200 return smpi_process_comm_world()->split(color, key);
201 int system_tag = 123;
204 MPI_Group group_root = nullptr;
205 MPI_Group group_out = nullptr;
206 MPI_Group group = this->group();
207 int rank = this->rank();
208 int size = this->size();
209 /* Gather all colors and keys on rank 0 */
210 int* sendbuf = xbt_new(int, 2);
214 recvbuf = xbt_new(int, 2 * size);
218 Coll_gather_default::gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this);
220 /* Do the actual job */
222 MPI_Group* group_snd = xbt_new(MPI_Group, size);
223 int* rankmap = xbt_new(int, 2 * size);
224 for (int i = 0; i < size; i++) {
225 if (recvbuf[2 * i] != MPI_UNDEFINED) {
227 for (int j = i + 1; j < size; j++) {
228 if(recvbuf[2 * i] == recvbuf[2 * j]) {
229 recvbuf[2 * j] = MPI_UNDEFINED;
230 rankmap[2 * count] = j;
231 rankmap[2 * count + 1] = recvbuf[2 * j + 1];
235 /* Add self in the group */
236 recvbuf[2 * i] = MPI_UNDEFINED;
237 rankmap[2 * count] = i;
238 rankmap[2 * count + 1] = recvbuf[2 * i + 1];
240 qsort(rankmap, count, 2 * sizeof(int), &smpi_compare_rankmap);
241 group_out = new Group(count);
243 group_root = group_out; /* Save root's group */
245 for (int j = 0; j < count; j++) {
246 int index = group->index(rankmap[2 * j]);
247 group_out->set_mapping(index, j);
249 MPI_Request* requests = xbt_new(MPI_Request, count);
251 for (int j = 0; j < count; j++) {
252 if(rankmap[2 * j] != 0) {
253 group_snd[reqs]=new Group(group_out);
254 requests[reqs] = Request::isend(&(group_snd[reqs]), 1, MPI_PTR, rankmap[2 * j], system_tag, this);
259 if(group_out != MPI_COMM_WORLD->group() && group_out != MPI_GROUP_EMPTY)
260 Group::unref(group_out);
262 Request::waitall(reqs, requests, MPI_STATUS_IGNORE);
269 group_out = group_root; /* exit with root's group */
271 if(color != MPI_UNDEFINED) {
272 Request::recv(&group_out, 1, MPI_PTR, 0, system_tag, this, MPI_STATUS_IGNORE);
273 } /* otherwise, exit with group_out == nullptr */
275 return group_out!=nullptr ? new Comm(group_out, nullptr) : MPI_COMM_NULL;
279 if (this == MPI_COMM_UNINITIALIZED){
280 smpi_process_comm_world()->ref();
287 void Comm::cleanup_attributes(){
288 if(attributes_ !=nullptr){
289 xbt_dict_cursor_t cursor = nullptr;
293 xbt_dict_foreach (attributes_, cursor, key, value) {
294 smpi_comm_key_elem elem = static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null(keyvals_, key));
295 if (elem != nullptr && elem->delete_fn != nullptr)
296 elem->delete_fn(this, atoi(key), value, &flag);
298 xbt_dict_free(&attributes_);
302 void Comm::cleanup_smp(){
303 if (intra_comm_ != MPI_COMM_NULL)
304 Comm::unref(intra_comm_);
305 if (leaders_comm_ != MPI_COMM_NULL)
306 Comm::unref(leaders_comm_);
307 if (non_uniform_map_ != nullptr)
308 xbt_free(non_uniform_map_);
309 if (leaders_map_ != nullptr)
310 xbt_free(leaders_map_);
313 void Comm::unref(Comm* comm){
314 if (comm == MPI_COMM_UNINITIALIZED){
315 Comm::unref(smpi_process_comm_world());
319 Group::unref(comm->group_);
321 if(comm->refcount_==0){
323 comm->cleanup_attributes();
328 static int compare_ints (const void *a, const void *b)
330 const int *da = static_cast<const int *>(a);
331 const int *db = static_cast<const int *>(b);
333 return static_cast<int>(*da > *db) - static_cast<int>(*da < *db);
336 void Comm::init_smp(){
339 if (this == MPI_COMM_UNINITIALIZED)
340 smpi_process_comm_world()->init_smp();
342 int comm_size = this->size();
344 // If we are in replay - perform an ugly hack
345 // tell SimGrid we are not in replay for a while, because we need the buffers to be copied for the following calls
346 bool replaying = false; //cache data to set it back again after
347 if(smpi_process_get_replaying()){
349 smpi_process_set_replaying(false);
352 if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
353 smpi_switch_data_segment(smpi_process_index());
355 //identify neighbours in comm
356 //get the indexes of all processes sharing the same simix host
357 xbt_swag_t process_list = SIMIX_host_self()->extension<simgrid::simix::Host>()->process_list;
358 int intra_comm_size = 0;
359 int min_index = INT_MAX;//the minimum index will be the leader
360 smx_actor_t actor = nullptr;
361 xbt_swag_foreach(actor, process_list) {
362 int index = actor->pid -1;
364 if(this->group()->rank(index)!=MPI_UNDEFINED){
366 //the process is in the comm
367 if(index < min_index)
371 XBT_DEBUG("number of processes deployed on my node : %d", intra_comm_size);
372 MPI_Group group_intra = new Group(intra_comm_size);
375 xbt_swag_foreach(actor, process_list) {
376 int index = actor->pid -1;
377 if(this->group()->rank(index)!=MPI_UNDEFINED){
378 group_intra->set_mapping(index, i);
383 MPI_Comm comm_intra = new Comm(group_intra, nullptr);
386 int * leaders_map= static_cast<int*>(xbt_malloc0(sizeof(int)*comm_size));
387 int * leader_list= static_cast<int*>(xbt_malloc0(sizeof(int)*comm_size));
388 for(i=0; i<comm_size; i++){
392 Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
394 if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
395 smpi_switch_data_segment(smpi_process_index());
398 if(leaders_map_==nullptr){
399 leaders_map_= leaders_map;
401 xbt_free(leaders_map);
404 int leader_group_size = 0;
405 for(i=0; i<comm_size; i++){
407 for(j=0;j<leader_group_size; j++){
408 if(leaders_map_[i]==leader_list[j]){
413 leader_list[leader_group_size]=leaders_map_[i];
417 qsort(leader_list, leader_group_size, sizeof(int),compare_ints);
419 MPI_Group leaders_group = new Group(leader_group_size);
421 MPI_Comm leader_comm = MPI_COMM_NULL;
422 if(MPI_COMM_WORLD!=MPI_COMM_UNINITIALIZED && this!=MPI_COMM_WORLD){
423 //create leader_communicator
424 for (i=0; i< leader_group_size;i++)
425 leaders_group->set_mapping(leader_list[i], i);
426 leader_comm = new Comm(leaders_group, nullptr);
427 this->set_leaders_comm(leader_comm);
428 this->set_intra_comm(comm_intra);
430 //create intracommunicator
432 for (i=0; i< leader_group_size;i++)
433 leaders_group->set_mapping(leader_list[i], i);
435 if(this->get_leaders_comm()==MPI_COMM_NULL){
436 leader_comm = new Comm(leaders_group, nullptr);
437 this->set_leaders_comm(leader_comm);
439 leader_comm=this->get_leaders_comm();
440 Group::unref(leaders_group);
442 smpi_process_set_comm_intra(comm_intra);
447 // Are the nodes uniform ? = same number of process/node
448 int my_local_size=comm_intra->size();
449 if(comm_intra->rank()==0) {
450 int* non_uniform_map = xbt_new0(int,leader_group_size);
451 Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
452 non_uniform_map, 1, MPI_INT, leader_comm);
453 for(i=0; i < leader_group_size; i++) {
454 if(non_uniform_map[0] != non_uniform_map[i]) {
459 if(is_uniform==0 && this->is_uniform()!=0){
460 non_uniform_map_= non_uniform_map;
462 xbt_free(non_uniform_map);
464 is_uniform_=is_uniform;
466 Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
468 if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
469 smpi_switch_data_segment(smpi_process_index());
471 // Are the ranks blocked ? = allocated contiguously on the SMP nodes
473 int prev=this->group()->rank(comm_intra->group()->index(0));
474 for (i=1; i<my_local_size; i++){
475 int that=this->group()->rank(comm_intra->group()->index(i));
484 Coll_allreduce_default::allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this);
486 if(MPI_COMM_WORLD==MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD){
488 is_blocked_=global_blocked;
491 is_blocked_=global_blocked;
493 xbt_free(leader_list);
496 smpi_process_set_replaying(true);
499 int Comm::attr_delete(int keyval){
500 smpi_comm_key_elem elem =
501 static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null_ext(keyvals_, reinterpret_cast<const char*>(&keyval), sizeof(int)));
504 if(elem->delete_fn!=MPI_NULL_DELETE_FN){
505 void* value = nullptr;
507 if(this->attr_get(keyval, &value, &flag)==MPI_SUCCESS){
508 int ret = elem->delete_fn(this, keyval, value, &flag);
513 if(attributes_==nullptr)
516 xbt_dict_remove_ext(attributes_, reinterpret_cast<const char*>(&keyval), sizeof(int));
520 int Comm::attr_get(int keyval, void* attr_value, int* flag){
521 smpi_comm_key_elem elem =
522 static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null_ext(keyvals_, reinterpret_cast<const char*>(&keyval), sizeof(int)));
525 if(attributes_==nullptr){
530 *static_cast<void**>(attr_value) =
531 xbt_dict_get_ext(attributes_, reinterpret_cast<const char*>(&keyval), sizeof(int));
540 int Comm::attr_put(int keyval, void* attr_value){
541 if(keyvals_==nullptr)
542 keyvals_ = xbt_dict_new_homogeneous(nullptr);
543 smpi_comm_key_elem elem =
544 static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null_ext(keyvals_, reinterpret_cast<const char*>(&keyval), sizeof(int)));
548 void* value = nullptr;
549 this->attr_get(keyval, &value, &flag);
550 if(flag!=0 && elem->delete_fn!=MPI_NULL_DELETE_FN){
551 int ret = elem->delete_fn(this, keyval, value, &flag);
555 if(attributes_==nullptr)
556 attributes_ = xbt_dict_new_homogeneous(nullptr);
558 xbt_dict_set_ext(attributes_, reinterpret_cast<const char*>(&keyval), sizeof(int), attr_value, nullptr);
562 MPI_Comm Comm::f2c(int id) {
564 return MPI_COMM_SELF;
566 return MPI_COMM_WORLD;
567 } else if(F2C::f2c_lookup_ != nullptr && id >= 0) {
569 MPI_Comm tmp = static_cast<MPI_Comm>(xbt_dict_get_or_null(F2C::f2c_lookup_,get_key_id(key, id)));
570 return tmp != nullptr ? tmp : MPI_COMM_NULL ;
572 return MPI_COMM_NULL;
576 void Comm::free_f(int id) {
578 xbt_dict_remove(F2C::f2c_lookup_, id==0? get_key(key, id) : get_key_id(key, id));
582 if(F2C::f2c_lookup_==nullptr){
583 F2C::f2c_lookup_=xbt_dict_new_homogeneous(nullptr);
586 xbt_dict_set(F2C::f2c_lookup_, this==MPI_COMM_WORLD? get_key(key, F2C::f2c_id_) : get_key_id(key,F2C::f2c_id_), this, nullptr);
588 return F2C::f2c_id_-1;
591 int Comm::keyval_create(MPI_Comm_copy_attr_function* copy_fn, MPI_Comm_delete_attr_function* delete_fn, int* keyval,
593 if(keyvals_==nullptr)
594 keyvals_ = xbt_dict_new_homogeneous(nullptr);
596 smpi_comm_key_elem value = static_cast<smpi_comm_key_elem>(xbt_new0(s_smpi_mpi_comm_key_elem_t,1));
598 value->copy_fn=copy_fn;
599 value->delete_fn=delete_fn;
601 *keyval = keyval_id_;
602 xbt_dict_set_ext(keyvals_, reinterpret_cast<const char*>(keyval), sizeof(int),static_cast<void*>(value), nullptr);
607 int Comm::keyval_free(int* keyval){
608 smpi_comm_key_elem elem =
609 static_cast<smpi_comm_key_elem>(xbt_dict_get_or_null_ext(keyvals_, reinterpret_cast<const char*>(keyval), sizeof(int)));
612 xbt_dict_remove_ext(keyvals_, reinterpret_cast<const char*>(keyval), sizeof(int));
617 void Comm::keyval_cleanup(){
618 if(Comm::keyvals_!=nullptr)
619 xbt_dict_free(&Comm::keyvals_);