From d5e1e19fefb2e6b1c1b7ec559ea8dd3bba91baf2 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Wed, 29 Aug 2018 01:59:17 +0200 Subject: [PATCH 1/1] Properly kill the context on HostFailureException Before, simix was kinda thinking that the actor was dead, but the context was still running, leading to a Holy Big Mess! --- src/kernel/context/Context.cpp | 5 +++-- src/kernel/context/ContextBoost.cpp | 5 ++++- src/kernel/context/ContextRaw.cpp | 6 +++++- src/kernel/context/ContextThread.cpp | 11 ++++++++--- src/kernel/context/ContextThread.hpp | 8 ++++---- src/kernel/context/ContextUnix.cpp | 8 ++++++-- 6 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/kernel/context/Context.cpp b/src/kernel/context/Context.cpp index ca6e447534..60ad8518a5 100644 --- a/src/kernel/context/Context.cpp +++ b/src/kernel/context/Context.cpp @@ -8,6 +8,8 @@ #include "src/kernel/context/Context.hpp" #include "src/simix/smx_private.hpp" +XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context); + /** * @brief creates a new context for a user level process * @param code a main function @@ -80,8 +82,7 @@ void Context::stop() this->process_->suspended_ = 0; this->iwannadie = false; - if (not this->process_->finished_) // It seems that we sometimes try to destroy an actor twice - simgrid::simix::simcall([this] { SIMIX_process_cleanup(this->process_); }); + simgrid::simix::simcall([this] { SIMIX_process_cleanup(this->process_); }); this->iwannadie = true; } diff --git a/src/kernel/context/ContextBoost.cpp b/src/kernel/context/ContextBoost.cpp index a0fb566231..e5b557f9ed 100644 --- a/src/kernel/context/ContextBoost.cpp +++ b/src/kernel/context/ContextBoost.cpp @@ -5,6 +5,7 @@ #include "ContextBoost.hpp" #include "context_private.hpp" +#include "simgrid/Exception.hpp" #include "src/simix/smx_private.hpp" XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context); @@ -113,10 +114,12 @@ void BoostContext::wrapper(BoostContext::arg_type arg) #endif try { (*context)(); - context->Context::stop(); } catch (StopRequest const&) { XBT_DEBUG("Caught a StopRequest"); + } catch (simgrid::HostFailureException const&) { + XBT_DEBUG("Caught an HostFailureException"); } + context->Context::stop(); ASAN_ONLY(context->asan_stop_ = true); context->suspend(); } diff --git a/src/kernel/context/ContextRaw.cpp b/src/kernel/context/ContextRaw.cpp index f76faf604c..6c3e92af5e 100644 --- a/src/kernel/context/ContextRaw.cpp +++ b/src/kernel/context/ContextRaw.cpp @@ -6,6 +6,7 @@ #include "ContextRaw.hpp" #include "context_private.hpp" #include "mc/mc.h" +#include "simgrid/Exception.hpp" #include "src/simix/smx_private.hpp" XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context); @@ -261,10 +262,13 @@ void RawContext::wrapper(void* arg) ASAN_FINISH_SWITCH(nullptr, &context->asan_ctx_->asan_stack_, &context->asan_ctx_->asan_stack_size_); try { (*context)(); - context->Context::stop(); } catch (StopRequest const&) { XBT_DEBUG("Caught a StopRequest"); + } catch (simgrid::HostFailureException const&) { + XBT_DEBUG("Caught an HostFailureException"); } + context->Context::stop(); + ASAN_ONLY(context->asan_stop_ = true); context->suspend(); } diff --git a/src/kernel/context/ContextThread.cpp b/src/kernel/context/ContextThread.cpp index d18e04cee4..5c2e86d262 100644 --- a/src/kernel/context/ContextThread.cpp +++ b/src/kernel/context/ContextThread.cpp @@ -3,16 +3,17 @@ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ -#include -#include +#include "src/kernel/context/ContextThread.hpp" +#include "simgrid/Exception.hpp" #include "src/internal_config.h" /* loads context system definitions */ #include "src/simix/smx_private.hpp" #include "src/xbt_modinter.h" /* prototype of os thread module's init/exit in XBT */ #include "xbt/function_types.h" #include "xbt/xbt_os_thread.h" -#include "src/kernel/context/ContextThread.hpp" +#include +#include XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context); @@ -120,7 +121,11 @@ void *ThreadContext::wrapper(void *param) } catch (StopRequest const&) { XBT_DEBUG("Caught a StopRequest"); xbt_assert(not context->is_maestro(), "I'm not supposed to be maestro here."); + } catch (simgrid::HostFailureException const&) { + XBT_DEBUG("Caught an HostFailureException"); } + if (not context->is_maestro()) // really? + context->Context::stop(); // Signal to the caller (normally the maestro) that we have finished: context->yield(); diff --git a/src/kernel/context/ContextThread.hpp b/src/kernel/context/ContextThread.hpp index 819e07c7d3..a0b958e8b4 100644 --- a/src/kernel/context/ContextThread.hpp +++ b/src/kernel/context/ContextThread.hpp @@ -1,5 +1,4 @@ -/* Copyright (c) 2009-2018. The SimGrid Team. - * All rights reserved. */ +/* Copyright (c) 2009-2018. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ @@ -9,8 +8,9 @@ #ifndef SIMGRID_SIMIX_THREAD_CONTEXT_HPP #define SIMGRID_SIMIX_THREAD_CONTEXT_HPP -#include - +#include "simgrid/simix.hpp" +#include "src/kernel/context/Context.hpp" +#include "xbt/xbt_os_thread.h" namespace simgrid { namespace kernel { diff --git a/src/kernel/context/ContextUnix.cpp b/src/kernel/context/ContextUnix.cpp index 3e98fc7ab6..d821c3c7eb 100644 --- a/src/kernel/context/ContextUnix.cpp +++ b/src/kernel/context/ContextUnix.cpp @@ -5,13 +5,15 @@ /* \file UContext.cpp Context switching with ucontexts from System V */ -#include "ContextUnix.hpp" #include "context_private.hpp" #include "mc/mc.h" +#include "simgrid/Exception.hpp" #include "src/mc/mc_ignore.hpp" #include "src/simix/ActorImpl.hpp" +#include "ContextUnix.hpp" + XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context); /** Many integers are needed to store a pointer @@ -121,10 +123,12 @@ void UContext::smx_ctx_sysv_wrapper(int i1, int i2) ASAN_FINISH_SWITCH(nullptr, &context->asan_ctx_->asan_stack_, &context->asan_ctx_->asan_stack_size_); try { (*context)(); - context->Context::stop(); } catch (simgrid::kernel::context::Context::StopRequest const&) { XBT_DEBUG("Caught a StopRequest"); + } catch (simgrid::HostFailureException const&) { + XBT_DEBUG("Caught an HostFailureException"); } + context->Context::stop(); ASAN_ONLY(context->asan_stop_ = true); context->suspend(); } -- 2.20.1