From: pini Date: Mon, 14 Jun 2010 14:22:20 +0000 (+0000) Subject: Added new model (default with smpirun) that integrates the three-interval linear... X-Git-Tag: v3_5~954 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/b1d63bb637a56b3d5ade0d5c07856d90025ae517 Added new model (default with smpirun) that integrates the three-interval linear regression for correction factors. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@7854 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- diff --git a/src/include/surf/surf.h b/src/include/surf/surf.h index 3de212bc1b..24bbcdf954 100644 --- a/src/include/surf/surf.h +++ b/src/include/surf/surf.h @@ -340,6 +340,18 @@ XBT_PUBLIC(void) create_workstations(void); */ XBT_PUBLIC_DATA(surf_model_t) surf_network_model; +/** \brief Same as network model 'LagrangeVelho', only with different correction factors. + * \ingroup SURF_models + * \param filename XML platform file name + * + * This model is proposed by Pierre-Nicolas Clauss and Martin Quinson and Stéphane Génaud + * based on the model 'LV08' and different correction factors depending on the communication + * size (< 1KiB, < 64KiB, >= 64KiB). + * + * \see surf_workstation_model_init_SMPI() + */ +XBT_PUBLIC(void) surf_network_model_init_SMPI(const char *filename); + /** \brief Initializes the platform with the network model 'LagrangeVelho' * \ingroup SURF_models * \param filename XML platform file name diff --git a/src/smpi/private.h b/src/smpi/private.h index 1265260410..483ba79b9d 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -32,7 +32,6 @@ typedef struct s_smpi_mpi_request { int complete; MPI_Request match; unsigned flags; - MPI_Request ack; } s_smpi_mpi_request_t; void smpi_process_init(int* argc, char*** argv); diff --git a/src/smpi/smpi_base.c b/src/smpi/smpi_base.c index 2e192a05ae..8b5c714b58 100644 --- a/src/smpi/smpi_base.c +++ b/src/smpi/smpi_base.c @@ -19,9 +19,6 @@ XBT_LOG_EXTERNAL_CATEGORY(smpi_receiver); XBT_LOG_EXTERNAL_CATEGORY(smpi_sender); XBT_LOG_EXTERNAL_CATEGORY(smpi_util); -#define EAGER_LIMIT 65536 -#define RDV_TAG (-10) - void smpi_process_init(int* argc, char*** argv) { int index; smpi_process_data_t data; @@ -61,23 +58,6 @@ static MPI_Request build_request(void* buf, int count, MPI_Datatype datatype, in request->complete = 0; request->match = MPI_REQUEST_NULL; request->flags = flags; - if(request->size < EAGER_LIMIT) { - request->ack = MPI_REQUEST_NULL; - } else { - request->ack = xbt_new(s_smpi_mpi_request_t, 1); - request->ack->buf = NULL; - request->ack->size = 0; - request->ack->src = dst; - request->ack->dst = src; - request->ack->tag = RDV_TAG; - request->ack->comm = comm; - request->ack->rdv = NULL; - request->ack->pair = NULL; - request->ack->complete = 0; - request->ack->match = MPI_REQUEST_NULL; - request->ack->flags = NON_PERSISTENT | ((request->flags & RECV) == RECV ? SEND : RECV); - smpi_mpi_start(request->ack); - } return request; } @@ -96,10 +76,6 @@ MPI_Request smpi_mpi_recv_init(void* buf, int count, MPI_Datatype datatype, int void smpi_mpi_start(MPI_Request request) { xbt_assert0(request->complete == 0, "Cannot start a non-finished communication"); - if(request->size >= EAGER_LIMIT) { - print_request("RDV ack", request->ack); - smpi_mpi_wait(&request->ack, MPI_STATUS_IGNORE); - } if((request->flags & RECV) == RECV) { smpi_process_post_recv(request); print_request("New recv", request); diff --git a/src/smpi/smpirun.in b/src/smpi/smpirun.in index 8966eb9d2f..85059a9624 100755 --- a/src/smpi/smpirun.in +++ b/src/smpi/smpirun.in @@ -13,6 +13,8 @@ NETWORK_LATENCY="${DEFAULT_NETWORK_LATENCY}" NUMPROCS="${DEFAULT_NUMPROCS}" POWER="${DEFAULT_POWER}" +SIMOPTS="--cfg=network/model:SMPI --cfg=TCP_gamma:4194304" + while true; do case "$1" in "-np" | "-n") diff --git a/src/surf/network.c b/src/surf/network.c index 61952158a8..86ec1833d7 100644 --- a/src/surf/network.c +++ b/src/surf/network.c @@ -22,6 +22,70 @@ double sg_weight_S_parameter = 0.0;/* default value; can be set by model or from double sg_tcp_gamma = 0.0; +/******************************************************************************/ +/* Factors callbacks */ +/******************************************************************************/ +static double constant_latency_factor(double size) +{ + return sg_latency_factor; +} + +static double constant_bandwidth_factor(double size) +{ + return sg_bandwidth_factor; +} + +static double constant_bandwidth_constraint(double rate, double bound, double size) +{ + return rate; +} + +/**********************/ +/* SMPI callbacks */ +/**********************/ +static double smpi_latency_factor(double size) +{ + /* 1 B <= size <= 1 KiB */ + if (size <= 1024.0) { + return 1.0056; + } + + /* 2 KiB <= size <= 32 KiB */ + if (size <= 32768.0) { + return 1.8805; + } + + /* 64 KiB <= size <= 4 MiB */ + return 22.7111; +} + +static double smpi_bandwidth_factor(double size) +{ + /* 1 B <= size <= 1 KiB */ + if (size <= 1024.0) { + return 0.2758; + } + + /* 2 KiB <= size <= 32 KiB */ + if (size <= 32768.0) { + return 0.5477; + } + + /* 64 KiB <= size <= 4 MiB */ + return 0.9359; +} + +static double smpi_bandwidth_constraint(double rate, double bound, double size) +{ + return rate < 0 ? bound : min(bound, rate * smpi_bandwidth_factor(size)); +} + + +static double (*latency_factor_callback)(double) = &constant_latency_factor; +static double (*bandwidth_factor_callback)(double) = &constant_bandwidth_factor; +static double (*bandwidth_constraint_callback)(double, double, double) = &constant_bandwidth_constraint; + + static link_CM02_t net_link_new(char *name, double bw_initial, tmgr_trace_t bw_trace, @@ -385,6 +449,7 @@ static surf_action_t net_communicate(const char *src_name, const char *dst_name, link_CM02_t link; int failed = 0; surf_action_network_CM02_t action = NULL; + double bandwidth_bound; /* LARGE PLATFORMS HACK: Add a link_CM02_t *link and a int link_nb to network_card_CM02_t. It will represent local links for this node Use the cluster_id for ->id */ @@ -414,17 +479,23 @@ static surf_action_t net_communicate(const char *src_name, const char *dst_name, action->latency = 0.0; action->weight = 0.0; + bandwidth_bound = -1.0; xbt_dynar_foreach(route, i, link) { action->latency += link->lat_current; action->weight += link->lat_current + sg_weight_S_parameter / (link->lmm_resource.power.peak * link->lmm_resource.power.scale); + if(bandwidth_bound < 0.0) + bandwidth_bound = (*bandwidth_factor_callback)(size) * (link->lmm_resource.power.peak * link->lmm_resource.power.scale); + else + bandwidth_bound = min(bandwidth_bound, (*bandwidth_factor_callback)(size) * (link->lmm_resource.power.peak * link->lmm_resource.power.scale)); } /* LARGE PLATFORMS HACK: Add src->link and dst->link latencies */ action->lat_current = action->latency; - action->latency *= sg_latency_factor; + action->latency *= (*latency_factor_callback)(size); + action->rate = (*bandwidth_constraint_callback)(action->rate, bandwidth_bound, size); /* LARGE PLATFORMS HACK: lmm_variable_new(..., total_route_size) */ @@ -565,6 +636,28 @@ static void surf_network_model_init_internal(void) NULL)); } +/************************************************************************/ +/* New model based on LV08 and experimental results of MPI ping-pongs */ +/************************************************************************/ +void surf_network_model_init_SMPI(const char *filename) +{ + + if (surf_network_model) + return; + surf_network_model_init_internal(); + latency_factor_callback = &smpi_latency_factor; + bandwidth_factor_callback = &smpi_bandwidth_factor; + bandwidth_constraint_callback = &smpi_bandwidth_constraint; + net_define_callbacks(filename); + xbt_dynar_push(model_list, &surf_network_model); + network_solve = lmm_solve; + + xbt_cfg_setdefault_double(_surf_cfg_set,"network/weight_S", 8775); + + update_model_description(surf_network_model_description, + "SMPI", surf_network_model); +} + /************************************************************************/ /* New model based on optimizations discussed during this thesis */ /************************************************************************/ diff --git a/src/surf/surf.c b/src/surf/surf.c index 5b31c8b398..a670cc14b7 100644 --- a/src/surf/surf.c +++ b/src/surf/surf.c @@ -116,6 +116,7 @@ s_surf_model_description_t surf_network_model_description[] = { {"Vivaldi", "Scalable network model using the Vivaldi coordinate ideas", NULL, surf_network_model_init_Vivaldi}, {"CM02", "Realistic network model with lmm_solve and no correction factors", NULL, surf_network_model_init_CM02}, {"LV08", "Realistic network model with lmm_solve and these correction factors: latency*=10.4, bandwidth*=.92, S=8775" , NULL, surf_network_model_init_LegrandVelho}, + {"SMPI", "Realistic network model with lmm_solve and correction factors on three intervals (< 1KiB, < 64 KiB, >= 64 KiB)", NULL, surf_network_model_init_SMPI}, #ifdef HAVE_GTNETS {"GTNets", "Network Pseudo-model using the GTNets simulator instead of an analytic model", NULL, surf_network_model_init_GTNETS}, #endif diff --git a/src/surf/surf_config.c b/src/surf/surf_config.c index 7f5e6fb3bb..bda88cd736 100644 --- a/src/surf/surf_config.c +++ b/src/surf/surf_config.c @@ -311,7 +311,7 @@ void surf_config_models_setup(const char *platform_file) network_model_name = xbt_cfg_get_string(_surf_cfg_set, "network/model"); cpu_model_name = xbt_cfg_get_string(_surf_cfg_set, "cpu/model"); - if ((strcmp(network_model_name,"LV08") || strcmp(cpu_model_name,"Cas01")) + if ((strcmp(network_model_name,"LV08") || strcmp(network_model_name,"SMPI") || strcmp(cpu_model_name,"Cas01")) && !strcmp(workstation_model_name, "CLM03")){ const char *val = "compound"; INFO0("Switching workstation model to compound since you changed the network and/or cpu model(s)");