From e9969d7016d2fdfc32e95b14472d2b7c78653acb Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Mon, 20 Jan 2014 14:20:43 +0100 Subject: [PATCH] add handling of the torus topology inside clusters --- src/include/surf/surfxml_parse_values.h | 2 - src/surf/surf_routing.cpp | 188 +++++++++++++++++++----- src/surf/surf_routing_cluster.cpp | 35 +++-- src/surf/surf_routing_cluster.hpp | 4 + 4 files changed, 177 insertions(+), 52 deletions(-) diff --git a/src/include/surf/surfxml_parse_values.h b/src/include/surf/surfxml_parse_values.h index 93b09e3803..f9f2386b57 100644 --- a/src/include/surf/surfxml_parse_values.h +++ b/src/include/surf/surfxml_parse_values.h @@ -12,8 +12,6 @@ typedef struct s_surf_parsing_link_up_down *surf_parsing_link_up_down_t; typedef struct s_surf_parsing_link_up_down { void* link_up; void* link_down; - void* limiter_link; - void* loopback_link; } s_surf_parsing_link_up_down_t; #endif /* SURFXML_PARSE_VALUES_H_ */ diff --git a/src/surf/surf_routing.cpp b/src/surf/surf_routing.cpp index 589c475b98..60fcd0fc8f 100644 --- a/src/surf/surf_routing.cpp +++ b/src/surf/surf_routing.cpp @@ -121,8 +121,6 @@ static void parse_S_host_link(sg_platf_host_link_cbarg_t host) s_surf_parsing_link_up_down_t link_up_down; link_up_down.link_up = xbt_lib_get_or_null(link_lib, host->link_up, SURF_LINK_LEVEL); link_up_down.link_down = xbt_lib_get_or_null(link_lib, host->link_down, SURF_LINK_LEVEL); - link_up_down.limiter_link = NULL; - link_up_down.loopback_link = NULL; xbt_assert(link_up_down.link_up, "Link '%s' not found!",host->link_up); xbt_assert(link_up_down.link_down, "Link '%s' not found!",host->link_down); @@ -800,11 +798,13 @@ static void routing_parse_cluster(sg_platf_cluster_cbarg_t cluster) { char *host_id, *groups, *link_id = NULL; xbt_dict_t patterns = NULL; + xbt_dynar_t dimensions; + int rankId=0; s_sg_platf_host_cbarg_t host; s_sg_platf_link_cbarg_t link; - unsigned int iter; + unsigned int iter, totalRanks=0; int start, end, i; xbt_dynar_t radical_elements; xbt_dynar_t radical_ends; @@ -817,6 +817,43 @@ static void routing_parse_cluster(sg_platf_cluster_cbarg_t cluster) xbt_dict_set(patterns, "suffix", xbt_strdup(cluster->suffix), NULL); } + dimensions = xbt_str_split(cluster->torus_dimensions, ","); + + + int nb_links_per_node = 1; + + if (!xbt_dynar_is_empty(dimensions)) { + /** + * We are in a torus cluster + * Parse attribute dimensions="dim1,dim2,dim3,...,dimN" + * and safe it in a dynarray. + * Additionally, we need to know how many ranks we have in total + */ + xbt_dynar_foreach(dimensions, iter, groups) { + int tmp = surf_parse_get_int(xbt_dynar_get_as(dimensions, iter, char *)); + xbt_dynar_set_as(dimensions, iter, int, tmp); + + if (totalRanks == 0) + totalRanks = tmp; + else + totalRanks *= tmp; + } + + nb_links_per_node = xbt_dynar_length(dimensions); + ((AsClusterPtr)current_routing)-> p_dimensions = dimensions; + + } + + if(cluster->loopback_bw!=0 || cluster->loopback_lat!=0){ + nb_links_per_node++; + ((AsClusterPtr)current_routing)->p_has_loopback=1; + } + + if(cluster->limiter_link!=0){ + nb_links_per_node++; + ((AsClusterPtr)current_routing)->p_has_limiter=1; + } + XBT_DEBUG("", cluster->id); s_sg_platf_AS_cbarg_t AS = SG_PLATF_AS_INITIALIZER; AS.id = cluster->id; @@ -888,30 +925,37 @@ static void routing_parse_cluster(sg_platf_cluster_cbarg_t cluster) XBT_DEBUG("", link_id, cluster->bw, cluster->lat); - memset(&link, 0, sizeof(link)); - link.id = link_id; - link.bandwidth = cluster->bw; - link.latency = cluster->lat; - link.state = SURF_RESOURCE_ON; - link.policy = cluster->sharing_policy; - sg_platf_new_link(&link); - s_surf_parsing_link_up_down_t info; + s_surf_parsing_link_up_down_t info, info_lim, info_loop; + // All links are saved in a matrix; + // every row describes a single node; every node + // may have multiple links. + // the first column may store a link from x to x if p_has_loopback is set + // the second column may store a limiter link if p_has_limiter is set + // other columns are to store one or more link, if we are in a torus - if (link.policy == SURF_LINK_FULLDUPLEX) { - char *tmp_link = bprintf("%s_UP", link_id); - info.link_up = - xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); - free(tmp_link); - tmp_link = bprintf("%s_DOWN", link_id); - info.link_down = + //add a loopback link + if(cluster->loopback_bw!=0 || cluster->loopback_lat!=0){ + char *tmp_link = bprintf("%s_loopback", link_id); + XBT_DEBUG("", tmp_link, + cluster->limiter_link); + + + memset(&link, 0, sizeof(link)); + link.id = tmp_link; + link.bandwidth = cluster->loopback_bw; + link.latency = cluster->loopback_lat; + link.state = SURF_RESOURCE_ON; + link.policy = SURF_LINK_FATPIPE; + sg_platf_new_link(&link); + info_loop.link_up = xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + info_loop.link_down = info.link_up; free(tmp_link); - } else { - info.link_up = xbt_lib_get_or_null(link_lib, link_id, SURF_LINK_LEVEL); - info.link_down = info.link_up; + xbt_dynar_set(current_routing->p_linkUpDownList, rankId*nb_links_per_node, &info_loop); } + //add a limiter link (shared link to account for maximal bandwidth of the node) if(cluster->limiter_link!=0){ char *tmp_link = bprintf("%s_limiter", link_id); XBT_DEBUG("", tmp_link, @@ -925,36 +969,104 @@ static void routing_parse_cluster(sg_platf_cluster_cbarg_t cluster) link.state = SURF_RESOURCE_ON; link.policy = SURF_LINK_SHARED; sg_platf_new_link(&link); - info.limiter_link = + info_lim.link_up = xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + info_lim.link_down = info.link_up; free(tmp_link); - }else{ - info.limiter_link =NULL; + xbt_dynar_set(current_routing->p_linkUpDownList, + rankId*nb_links_per_node + ((AsClusterPtr)current_routing)->p_has_loopback , + &info_lim); + } - if(cluster->loopback_bw!=0 || cluster->loopback_lat!=0){ - char *tmp_link = bprintf("%s_loopback", link_id); - XBT_DEBUG("", tmp_link, - cluster->limiter_link); + if(xbt_dynar_length(dimensions) == 0 ) { + /** + * If torus is not specified, generate one link by node + */ + memset(&link, 0, sizeof(link)); - link.id = tmp_link; - link.bandwidth = cluster->loopback_bw; - link.latency = cluster->loopback_lat; + link.id = link_id; + link.bandwidth = cluster->bw; + link.latency = cluster->lat; link.state = SURF_RESOURCE_ON; - link.policy = SURF_LINK_FATPIPE; + link.policy = cluster->sharing_policy; sg_platf_new_link(&link); - info.loopback_link = - xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); - free(tmp_link); + + if (link.policy == SURF_LINK_FULLDUPLEX) { + char *tmp_link = bprintf("%s_UP", link_id); + info.link_up = + xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + free(tmp_link); + tmp_link = bprintf("%s_DOWN", link_id); + info.link_down = + xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + free(tmp_link); + } else { + info.link_up = xbt_lib_get_or_null(link_lib, link_id, SURF_LINK_LEVEL); + info.link_down = info.link_up; + } + xbt_dynar_set(current_routing->p_linkUpDownList, rankId*nb_links_per_node + + ((AsClusterPtr)current_routing)->p_has_loopback + + ((AsClusterPtr)current_routing)->p_has_limiter, + &info); }else{ - info.loopback_link =NULL; - } - xbt_dynar_push(current_routing->p_linkUpDownList, &info); + unsigned int j = 0; + /** + * Create all links that exist in the torus. + * Each rank creates #dimensions-1 links + */ + int neighbour_rank_id = 0; // The other node the link connects + int current_dimension = 0, // which dimension are we currently in? + // we need to iterate over all dimensions + // and create all links there + dim_product = 1; // Needed to calculate the next neighbour_id + for (j = 0; j < xbt_dynar_length(dimensions); j++) { + + memset(&link, 0, sizeof(link)); + current_dimension = xbt_dynar_get_as(dimensions, j, int); + neighbour_rank_id = ( ((int) i / dim_product) % current_dimension == current_dimension-1) ? i - (current_dimension-1)*dim_product : i + dim_product; + link_id = bprintf("link_from_%i_to_%i", i, neighbour_rank_id); + link.id = link_id; + link.bandwidth = cluster->bw; + link.latency = cluster->lat; + link.state = SURF_RESOURCE_ON; + link.policy = cluster->sharing_policy; + sg_platf_new_link(&link); + s_surf_parsing_link_up_down_t info; + if (link.policy == SURF_LINK_FULLDUPLEX) { + char *tmp_link = bprintf("%s_UP", link_id); + info.link_up = + xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + free(tmp_link); + tmp_link = bprintf("%s_DOWN", link_id); + info.link_down = + xbt_lib_get_or_null(link_lib, tmp_link, SURF_LINK_LEVEL); + free(tmp_link); + } else { + info.link_up = xbt_lib_get_or_null(link_lib, link_id, SURF_LINK_LEVEL); + info.link_down = info.link_up; + } + /** + * Add the link to its appropriate position; + * note that position rankId*(xbt_dynar_length(dimensions)+has_loopack?+has_limiter?) + * holds the link "rankId->rankId" + */ + xbt_dynar_set(current_routing->p_linkUpDownList, rankId*nb_links_per_node + + ((AsClusterPtr)current_routing)->p_has_loopback + + ((AsClusterPtr)current_routing)->p_has_limiter + + j, + &info); + dim_product *= current_dimension; + xbt_free(link_id); + + } + } xbt_free(link_id); xbt_free(host_id); + rankId++; } xbt_dynar_free(&radical_ends); diff --git a/src/surf/surf_routing_cluster.cpp b/src/surf/surf_routing_cluster.cpp index 9670dfe989..93503a1026 100644 --- a/src/surf/surf_routing_cluster.cpp +++ b/src/surf/surf_routing_cluster.cpp @@ -23,6 +23,9 @@ AsCluster::AsCluster() : AsNone() p_backbone = 0; p_loopback = 0; p_router = 0; + p_dimensions = NULL; + p_has_limiter = 0; + p_has_loopback = 0; } /* Business methods */ @@ -32,25 +35,33 @@ void AsCluster::getRouteAndLatency(RoutingEdgePtr src, RoutingEdgePtr dst, sg_pl XBT_VERB("cluster_get_route_and_latency from '%s'[%d] to '%s'[%d]", src->p_name, src->m_id, dst->p_name, dst->m_id); + //retrieve the number of links we stored for each node + int nb_links_per_node = p_has_loopback + p_has_limiter + + (p_dimensions ? xbt_dynar_length(p_dimensions) : 1); + if (src->p_rcType != SURF_NETWORK_ELEMENT_ROUTER) { // No specific link for router - info = xbt_dynar_get_as(p_linkUpDownList, src->m_id, s_surf_parsing_link_up_down_t); - if((src->m_id == dst->m_id) && info.loopback_link ){ - xbt_dynar_push_as(route->link_list, void *, info.loopback_link); + if((src->m_id == dst->m_id) && p_has_loopback ){ + info = xbt_dynar_get_as(p_linkUpDownList, src->m_id * nb_links_per_node, s_surf_parsing_link_up_down_t); + xbt_dynar_push_as(route->link_list, void *, info.link_up); if (lat) - *lat += static_cast(info.loopback_link)->getLatency(); + *lat += static_cast(info.link_up)->getLatency(); return; } - if (info.limiter_link) // limiter for sender - xbt_dynar_push_as(route->link_list, void *, info.limiter_link); + if (p_has_limiter){ // limiter for sender + info = xbt_dynar_get_as(p_linkUpDownList, src->m_id * nb_links_per_node + p_has_loopback, s_surf_parsing_link_up_down_t); + xbt_dynar_push_as(route->link_list, void *, info.link_up); + } + info = xbt_dynar_get_as(p_linkUpDownList, src->m_id * nb_links_per_node + p_has_loopback + p_has_limiter, s_surf_parsing_link_up_down_t); if (info.link_up) { // link up xbt_dynar_push_as(route->link_list, void *, info.link_up); if (lat) *lat += static_cast(info.link_up)->getLatency(); } + } if (p_backbone) { @@ -60,17 +71,17 @@ void AsCluster::getRouteAndLatency(RoutingEdgePtr src, RoutingEdgePtr dst, sg_pl } if (dst->p_rcType != SURF_NETWORK_ELEMENT_ROUTER) { // No specific link for router - info = - xbt_dynar_get_as(p_linkUpDownList, dst->m_id, s_surf_parsing_link_up_down_t); + info = xbt_dynar_get_as(p_linkUpDownList, dst->m_id * nb_links_per_node + p_has_loopback + p_has_limiter, s_surf_parsing_link_up_down_t); + if (info.link_down) { // link down xbt_dynar_push_as(route->link_list, void *, info.link_down); if (lat) *lat += static_cast(info.link_down)->getLatency(); } - - if (info.limiter_link) // limiter for receiver - xbt_dynar_push_as(route->link_list, void *, info.limiter_link); - + if (p_has_limiter){ // limiter for receiver + info = xbt_dynar_get_as(p_linkUpDownList, dst->m_id * nb_links_per_node + p_has_loopback, s_surf_parsing_link_up_down_t); + xbt_dynar_push_as(route->link_list, void *, info.link_up); + } } } diff --git a/src/surf/surf_routing_cluster.hpp b/src/surf/surf_routing_cluster.hpp index b134050a02..6300c8e8f1 100644 --- a/src/surf/surf_routing_cluster.hpp +++ b/src/surf/surf_routing_cluster.hpp @@ -36,6 +36,10 @@ public: NetworkLinkPtr p_backbone; void *p_loopback; RoutingEdgePtr p_router; + xbt_dynar_t p_dimensions; + int p_has_limiter; + int p_has_loopback; + }; -- 2.20.1