From e1927a1b205f67633bcef12a8cf8084858af52ca Mon Sep 17 00:00:00 2001 From: Bruno Donassolo Date: Fri, 18 Mar 2022 10:32:06 +0100 Subject: [PATCH] Remove bmf host model. Add it as an option. Remove --cfg=host/model:ptask_BMF. Use --cfg=host/model:ptask_L07 --cfg=host/solver:bmf instead. More consistent with other models (CPU, disk, etc) --- ChangeLog | 14 +++++----- docs/source/Configuring_SimGrid.rst | 25 ++++++++++++++--- src/kernel/lmm/bmf.cpp | 6 +---- src/surf/ptask_L07.cpp | 27 +++++++------------ src/surf/surf_interface.cpp | 2 -- src/surf/surf_interface.hpp | 8 ------ .../models/ptask-subflows/ptask-subflows.tesh | 24 +++-------------- 7 files changed, 43 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70839ce562..c3c960cb44 100644 --- a/ChangeLog +++ b/ChangeLog @@ -45,19 +45,19 @@ New plugin: the Chaos Monkey (killing actors at any time) but some users may find it interesting too. Models: - - New model for parallel task: ptask_BMF. - - More realistic sharing of heterogeneous resources compared to ptask_L07. + - New solver for parallel task: BMF. + - More realistic sharing of heterogeneous resources compared to the fair + bottleneck solver used by ptask_L07. - Implement the BMF (Bottleneck max fairness) fairness. - Improved resource sharing for parallel tasks with sub-flows (parallel communications between same source and destination inside the ptask). - Parameters: - - "--cfg=host/model:ptask_BMF": enable the model. + - "--cfg=host/model:ptask_L07 --cfg=host/solver:bmf": enable the ptask + model with BMF solver. - "--cfg=bmf/max-iterations: ": maximum number of iterations performed by BMF solver (default: 1000). - - "--cfg=bmf/selective-update:": enable/disable the - selective-update optimization. Only invalidates and recomputes modified - parts of inequations system. May speed up simulation if sparse resource - utilization (default: false). + - "--cfg=bmf/precision: ": numerical precision used when computing + resource sharing (default: 1e-12). - This model requires Eigen3 library. Make sure Eigen3 is installed to use BMF. General: diff --git a/docs/source/Configuring_SimGrid.rst b/docs/source/Configuring_SimGrid.rst index fcd52dd697..f8d92428a3 100644 --- a/docs/source/Configuring_SimGrid.rst +++ b/docs/source/Configuring_SimGrid.rst @@ -251,9 +251,6 @@ models for all existing resources. - **ptask_L07:** Host model somehow similar to Cas01+CM02 but allowing "parallel tasks", that are intended to model the moldable tasks of the grid scheduling literature. - - **ptask_BMF:** More realistic model for heterogeneous resource sharing. - Implements BMF (Bottleneck max fairness) fairness. To be used with - parallel tasks instead of ptask_L07. - ``storage/model``: specify the used storage model. Only one model is provided so far. @@ -262,12 +259,32 @@ models for all existing resources. .. todo: make 'compound' the default host model. +.. _options_model_solver: + +Solver +...... + +The different models rely on a linear inequalities solver to share +the underlying resources. SimGrid allows you to change the solver, but +be cautious, **don't change it unless you are 100% sure**. + + - items ``cpu/solver``, ``network/solver``, ``disk/solver`` and ``host/solver`` + allow you to change the solver for each model: + + - **maxmin:** The default solver for all models except ptask. Provides a + max-min fairness allocation. + - **fairbottleneck:** The default solver for ptasks. Extends max-min to + allow heterogeneous resources. + - **bmf:** More realistic solver for heterogeneous resource sharing. + Implements BMF (Bottleneck max fairness) fairness. To be used with + parallel tasks instead of fair-bottleneck. + .. _options_model_optim: Optimization Level .................. -The network and CPU models that are based on lmm_solve (that +The network and CPU models that are based on linear inequalities solver (that is, all our analytical models) accept specific optimization configurations. diff --git a/src/kernel/lmm/bmf.cpp b/src/kernel/lmm/bmf.cpp index a5d7f53b92..e7ce968eb2 100644 --- a/src/kernel/lmm/bmf.cpp +++ b/src/kernel/lmm/bmf.cpp @@ -17,10 +17,6 @@ simgrid::config::Flag cfg_bmf_max_iteration("bmf/max-iterations", "Maximum number of steps to be performed while searching for a BMF allocation", 1000); -simgrid::config::Flag cfg_bmf_selective_update{ - "bmf/selective-update", "Update the constraint set propagating recursively to others constraints (off by default)", - false}; - simgrid::config::Flag cfg_bmf_precision{"bmf/precision", "Numerical precision used when computing resource sharing", 1E-12}; @@ -406,7 +402,7 @@ Eigen::VectorXd BmfSolver::solve() fprintf(stderr, "Unable to find a BMF allocation for your system.\n" "You may try to increase the maximum number of iterations performed by BMF solver " "(\"--cfg=bmf/max-iterations\").\n" - "Additionally, you could decrease numerical precision (\"--cfg=bmf/precision\").\n"); + "Additionally, you could adjust numerical precision (\"--cfg=bmf/precision\").\n"); fprintf(stderr, "Internal states (after %d iterations):\n", it); fprintf(stderr, "A:\n%s\n", debug_eigen(A_).c_str()); fprintf(stderr, "maxA:\n%s\n", debug_eigen(maxA_).c_str()); diff --git a/src/surf/ptask_L07.cpp b/src/surf/ptask_L07.cpp index a8deb27fff..88e2fff9f2 100644 --- a/src/surf/ptask_L07.cpp +++ b/src/surf/ptask_L07.cpp @@ -20,36 +20,29 @@ XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(res_host); XBT_LOG_EXTERNAL_CATEGORY(xbt_cfg); +/*********** + * Options * + ***********/ +static simgrid::config::Flag cfg_ptask_solver("host/solver", + "Set linear equations solver used by ptask model", + "fairbottleneck", + &simgrid::kernel::lmm::System::validate_solver); + /**************************************/ /*** Resource Creation & Destruction **/ /**************************************/ void surf_host_model_init_ptask_L07() { XBT_CINFO(xbt_cfg, "Switching to the L07 model to handle parallel tasks."); + xbt_assert(cfg_ptask_solver != "maxmin", "Invalid configuration. Cannot use maxmin solver with parallel tasks."); - auto* system = simgrid::kernel::lmm::System::build("fairbottleneck", true /* selective update */); + auto* system = simgrid::kernel::lmm::System::build(cfg_ptask_solver, true /* selective update */); auto host_model = std::make_shared("Host_Ptask", system); auto* engine = simgrid::kernel::EngineImpl::get_instance(); engine->add_model(host_model); engine->get_netzone_root()->set_host_model(host_model); } -void surf_host_model_init_ptask_BMF() -{ -#if SIMGRID_HAVE_EIGEN3 - XBT_CINFO(xbt_cfg, "Switching to the BMF model to handle parallel tasks."); - - bool select = simgrid::config::get_value("bmf/selective-update"); - auto* system = simgrid::kernel::lmm::System::build("bmf", select); - auto host_model = std::make_shared("Host_Ptask", system); - auto* engine = simgrid::kernel::EngineImpl::get_instance(); - engine->add_model(host_model); - engine->get_netzone_root()->set_host_model(host_model); -#else - xbt_die("Cannot use the BMF ptask model without installing Eigen3."); -#endif -} - namespace simgrid { namespace kernel { namespace resource { diff --git a/src/surf/surf_interface.cpp b/src/surf/surf_interface.cpp index f7be890157..766f547117 100644 --- a/src/surf/surf_interface.cpp +++ b/src/surf/surf_interface.cpp @@ -81,8 +81,6 @@ const std::vector surf_host_model_description = { &surf_host_model_init_compound}, {"ptask_L07", "Host model somehow similar to Cas01+CM02 but allowing parallel tasks", &surf_host_model_init_ptask_L07}, - {"ptask_BMF", "Host model which implements BMF resource allocation and allows parallel tasks", - &surf_host_model_init_ptask_BMF}, }; const std::vector surf_optimization_mode_description = { diff --git a/src/surf/surf_interface.hpp b/src/surf/surf_interface.hpp index 95d9acebaf..2cdc8f4210 100644 --- a/src/surf/surf_interface.hpp +++ b/src/surf/surf_interface.hpp @@ -182,14 +182,6 @@ XBT_PUBLIC void surf_host_model_init_current_default(); */ XBT_PUBLIC void surf_host_model_init_ptask_L07(); -/** @ingroup SURF_models - * @brief Initializes the platform with the model BMF - * - * With this model, only parallel tasks can be used. - * Resource sharing is done by calculating a BMF (bottleneck max fairness) allocation - */ -XBT_PUBLIC void surf_host_model_init_ptask_BMF(); - XBT_PUBLIC void surf_disk_model_init_default(); /* -------------------- diff --git a/teshsuite/models/ptask-subflows/ptask-subflows.tesh b/teshsuite/models/ptask-subflows/ptask-subflows.tesh index de5fe7f682..f5e5094f32 100644 --- a/teshsuite/models/ptask-subflows/ptask-subflows.tesh +++ b/teshsuite/models/ptask-subflows/ptask-subflows.tesh @@ -1,7 +1,8 @@ p Test subflows with new BMF model -$ ${bindir:=.}/ptask-subflows --cfg=host/model:ptask_BMF -> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'host/model' to 'ptask_BMF' -> [0.000000] [xbt_cfg/INFO] Switching to the BMF model to handle parallel tasks. +$ ${bindir:=.}/ptask-subflows --cfg=host/model:ptask_L07 --cfg=host/solver:bmf +> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'host/model' to 'ptask_L07' +> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'host/solver' to 'bmf' +> [0.000000] [xbt_cfg/INFO] Switching to the L07 model to handle parallel tasks. > [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] TEST: 1 parallel task with 2 flows > [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] Parallel task sends 1.5B to other host. > [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] Same result for L07 and BMF since the ptask is alone. @@ -30,20 +31,3 @@ $ ${bindir:=.}/ptask-subflows --cfg=host/model:ptask_L07 > [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] With L07: Should be done in 4 seconds: 1s latency and 3 second for transfer. > [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] With BMF, ptask gets 50% more bandwidth than the noisy flow (because of the sub). > [hostA:ptask:(1) 6.500000] [ptask_subflows_test/INFO] Parallel task finished after 4.000000 seconds - -p Test selective_update enable -$ ${bindir:=.}/ptask-subflows --cfg=host/model:ptask_BMF --cfg=bmf/selective-update:true -> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'host/model' to 'ptask_BMF' -> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'bmf/selective-update' to 'true' -> [0.000000] [xbt_cfg/INFO] Switching to the BMF model to handle parallel tasks. -> [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] TEST: 1 parallel task with 2 flows -> [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] Parallel task sends 1.5B to other host. -> [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] Same result for L07 and BMF since the ptask is alone. -> [hostA:ptask:(1) 0.000000] [ptask_subflows_test/INFO] Should be done in 2.5 seconds: 1s latency and 1.5 second for transfer -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] Parallel task finished after 2.500000 seconds -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] TEST: Same parallel task but with a noisy communication at the side -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] Parallel task sends 1.5B to other host. -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] With BMF: Should be done in 3.5 seconds: 1s latency and 2 second for transfer. -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] With L07: Should be done in 4 seconds: 1s latency and 3 second for transfer. -> [hostA:ptask:(1) 2.500000] [ptask_subflows_test/INFO] With BMF, ptask gets 50% more bandwidth than the noisy flow (because of the sub). -> [hostA:ptask:(1) 6.000000] [ptask_subflows_test/INFO] Parallel task finished after 3.500000 seconds -- 2.20.1