1 /* Copyright (c) 2007-2023. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include "src/mc/remote/CheckerSide.hpp"
7 #include "src/mc/explo/Exploration.hpp"
8 #include "src/mc/mc_environ.h"
9 #include "xbt/config.hpp"
10 #include "xbt/system_error.hpp"
13 #include <sys/prctl.h>
16 #include <boost/tokenizer.hpp>
19 #include <sys/ptrace.h>
23 #define WAITPID_CHECKED_FLAGS __WALL
25 #define WAITPID_CHECKED_FLAGS 0
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_checkerside, mc, "MC communication with the application");
30 static simgrid::config::Flag<std::string> _sg_mc_setenv{
31 "model-check/setenv", "Extra environment variables to pass to the child process (ex: 'AZE=aze;QWE=qwe').", "",
32 [](std::string_view value) {
33 xbt_assert(value.empty() || value.find('=', 0) != std::string_view::npos,
34 "The 'model-check/setenv' parameter must be like 'AZE=aze', but it does not contain an equal sign.");
37 namespace simgrid::mc {
39 XBT_ATTRIB_NORETURN static void run_child_process(int socket, const std::vector<char*>& args)
41 /* On startup, simix_global_init() calls simgrid::mc::Client::initialize(), which checks whether the MC_ENV_SOCKET_FD
42 * env variable is set. If so, MC mode is assumed, and the client is setup from its side
46 // Make sure we do not outlive our parent
49 xbt_assert(sigprocmask(SIG_SETMASK, &mask, nullptr) >= 0, "Could not unblock signals");
50 xbt_assert(prctl(PR_SET_PDEATHSIG, SIGHUP) == 0, "Could not PR_SET_PDEATHSIG");
53 setenv(MC_ENV_SOCKET_FD, std::to_string(socket).c_str(), 1);
55 /* Setup the tokenizer that parses the cfg:model-check/setenv parameter */
56 using Tokenizer = boost::tokenizer<boost::char_separator<char>>;
57 boost::char_separator<char> semicol_sep(";");
58 boost::char_separator<char> equal_sep("=");
59 Tokenizer token_vars(_sg_mc_setenv.get(), semicol_sep); /* Iterate over all FOO=foo parts */
60 for (const auto& token : token_vars) {
61 std::vector<std::string> kv;
62 Tokenizer token_kv(token, equal_sep);
63 for (const auto& t : token_kv) /* Iterate over 'FOO' and then 'foo' in that 'FOO=foo' */
65 xbt_assert(kv.size() == 2, "Parse error on 'model-check/setenv' value %s. Does it contain an equal sign?",
67 XBT_INFO("setenv '%s'='%s'", kv[0].c_str(), kv[1].c_str());
68 setenv(kv[0].c_str(), kv[1].c_str(), 1);
71 /* And now, exec the child process */
73 while (args[i] != nullptr && args[i][0] == '-')
76 xbt_assert(args[i] != nullptr,
77 "Unable to find a binary to exec on the command line. Did you only pass config flags?");
79 execvp(args[i], args.data() + i);
80 XBT_CRITICAL("The model-checked process failed to exec(%s): %s.\n"
81 " Make sure that your binary exists on disk and is executable.",
82 args[i], strerror(errno));
83 if (strchr(args[i], '=') != nullptr)
84 XBT_CRITICAL("If you want to pass environment variables to the application, please use --cfg=model-check/setenv:%s",
87 xbt_die("Aborting now.");
90 static void wait_application_process(pid_t pid)
92 XBT_DEBUG("Waiting for the model-checked process");
95 // The model-checked process SIGSTOP itself to signal it's ready:
96 xbt_assert(waitpid(pid, &status, WAITPID_CHECKED_FLAGS) == pid && WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP,
97 "Could not wait model-checked process");
101 ptrace(PTRACE_SETOPTIONS, pid, nullptr, PTRACE_O_TRACEEXIT);
102 ptrace(PTRACE_CONT, pid, 0, 0);
104 ptrace(PT_CONTINUE, pid, (caddr_t)1, 0);
106 xbt_die("no ptrace equivalent coded for this platform, stateful model-checking is impossible.");
108 xbt_assert(errno == 0,
109 "Ptrace does not seem to be usable in your setup (errno: %d). "
110 "If you run from within a docker, adding `--cap-add SYS_PTRACE` to the docker line may help. "
111 "If it does not help, please report this bug.",
113 XBT_DEBUG("%d ptrace correctly setup.", getpid());
116 void CheckerSide::setup_events(bool socket_only)
118 auto* base = event_base_new();
121 socket_event_ = event_new(
122 base, get_channel().get_socket(), EV_READ | EV_PERSIST,
123 [](evutil_socket_t, short events, void* arg) {
124 auto* checker = static_cast<simgrid::mc::CheckerSide*>(arg);
125 if (events == EV_READ) {
127 std::array<char, MC_MESSAGE_LENGTH> buffer;
128 ssize_t size = checker->get_channel().receive(buffer.data(), buffer.size(), MSG_DONTWAIT);
130 XBT_ERROR("Channel::receive failure: %s", strerror(errno));
132 throw simgrid::xbt::errno_error();
135 if (size == 0) // The app closed the socket. It must be dead by now.
136 checker->handle_waitpid();
137 else if (not checker->handle_message(buffer.data(), size)) {
138 checker->break_loop();
141 } while (checker->get_channel().has_pending_data());
143 xbt_die("Unexpected event");
147 event_add(socket_event_, nullptr);
150 signal_event_ = nullptr;
152 signal_event_ = event_new(
153 base, SIGCHLD, EV_SIGNAL | EV_PERSIST,
154 [](evutil_socket_t sig, short events, void* arg) {
155 auto* checker = static_cast<simgrid::mc::CheckerSide*>(arg);
156 if (events == EV_SIGNAL) {
158 checker->handle_waitpid();
160 xbt_die("Unexpected signal: %d", sig);
162 xbt_die("Unexpected event");
166 event_add(signal_event_, nullptr);
170 /* When this constructor is called, no other checkerside exists */
171 CheckerSide::CheckerSide(const std::vector<char*>& args) : running_(true)
173 XBT_DEBUG("Create a CheckerSide.");
175 // Create an AF_UNIX socketpair used for exchanging messages between the model-checker process (ancestor)
176 // and the application process (child)
178 xbt_assert(socketpair(AF_UNIX,
180 SOCK_STREAM, /* Mac OSX does not have AF_UNIX + SOCK_SEQPACKET, even if that's faster */
185 "Could not create socketpair: %s", strerror(errno));
188 xbt_assert(pid_ >= 0, "Could not fork application process");
190 if (pid_ == 0) { // Child
192 run_child_process(sockets[0], args);
196 // Parent (model-checker):
198 channel_.reset_socket(sockets[1]);
200 setup_events(false); /* we need a signal handler too */
204 CheckerSide::~CheckerSide()
206 event_del(socket_event_);
207 event_free(socket_event_);
208 if (signal_event_ != nullptr) {
209 event_del(signal_event_);
210 event_free(signal_event_);
214 /* This constructor is called when cloning a checkerside to get its application to fork away */
215 CheckerSide::CheckerSide(int socket, CheckerSide* child_checker)
216 : channel_(socket, child_checker->channel_), running_(true), child_checker_(child_checker)
218 setup_events(true); // We already have a signal handled in that case
220 s_mc_message_int_t answer;
221 ssize_t s = get_channel().receive(answer);
222 xbt_assert(s != -1, "Could not receive answer to FORK_REPLY");
223 xbt_assert(s == sizeof answer, "Broken message (size=%zd; expected %zu)", s, sizeof answer);
224 xbt_assert(answer.type == MessageType::FORK_REPLY,
225 "Received unexpected message %s (%i); expected MessageType::FORK_REPLY (%i)", to_c_str(answer.type),
226 (int)answer.type, (int)MessageType::FORK_REPLY);
232 std::unique_ptr<CheckerSide> CheckerSide::clone(int master_socket, const std::string& master_socket_name)
234 s_mc_message_fork_t m = {};
235 m.type = MessageType::FORK;
236 xbt_assert(master_socket_name.size() == MC_SOCKET_NAME_LEN);
237 std::copy_n(begin(master_socket_name), MC_SOCKET_NAME_LEN, begin(m.socket_name));
238 xbt_assert(get_channel().send(m) == 0, "Could not ask the app to fork on need.");
240 int sock = accept(master_socket, nullptr /* I know who's connecting*/, nullptr);
241 xbt_assert(sock > 0, "Cannot accept the incomming connection of the forked app: %s.", strerror(errno));
243 return std::make_unique<CheckerSide>(sock, this);
246 void CheckerSide::finalize(bool terminate_asap)
248 s_mc_message_int_t m = {};
249 m.type = MessageType::FINALIZE;
250 m.value = terminate_asap;
251 xbt_assert(get_channel().send(m) == 0, "Could not ask the app to finalize on need");
253 s_mc_message_t answer;
254 ssize_t s = get_channel().receive(answer);
255 xbt_assert(s != -1, "Could not receive answer to FINALIZE");
256 xbt_assert(s == sizeof answer, "Broken message (size=%zd; expected %zu)", s, sizeof answer);
257 xbt_assert(answer.type == MessageType::FINALIZE_REPLY,
258 "Received unexpected message %s (%i); expected MessageType::FINALIZE_REPLY (%i)", to_c_str(answer.type),
259 (int)answer.type, (int)MessageType::FINALIZE_REPLY);
262 void CheckerSide::dispatch_events() const
264 event_base_dispatch(base_.get());
267 void CheckerSide::break_loop() const
269 event_base_loopbreak(base_.get());
272 bool CheckerSide::handle_message(const char* buffer, ssize_t size)
274 s_mc_message_t base_message;
276 xbt_assert(size >= (ssize_t)sizeof(base_message), "Broken message. Got only %ld bytes.", size);
277 memcpy(&base_message, buffer, sizeof(base_message));
279 switch (base_message.type) {
281 case MessageType::WAITING:
282 consumed = sizeof(s_mc_message_t);
283 if (size > consumed) {
284 XBT_DEBUG("%d reinject %d bytes after a %s message", getpid(), (int)(size - consumed),
285 to_c_str(base_message.type));
286 channel_.reinject(&buffer[consumed], size - consumed);
291 case MessageType::ASSERTION_FAILED:
292 // report_assertion_failure() is NORETURN, but it may change when we report more than one error per run,
293 // so please keep the consumed computation even if clang-static detects it as a dead affectation.
294 consumed = sizeof(s_mc_message_t);
295 Exploration::get_instance()->report_assertion_failure();
299 xbt_die("Unexpected message from the application");
301 if (size > consumed) {
302 XBT_DEBUG("%d reinject %d bytes after a %s message", getpid(), (int)(size - consumed), to_c_str(base_message.type));
303 channel_.reinject(&buffer[consumed], size - consumed);
308 void CheckerSide::wait_for_requests()
310 XBT_DEBUG("Resume the application");
311 if (get_channel().send(MessageType::CONTINUE) != 0)
312 throw xbt::errno_error();
318 void CheckerSide::handle_dead_child(int status)
320 // From PTRACE_O_TRACEEXIT:
322 if (status >> 8 == (SIGTRAP | (PTRACE_EVENT_EXIT << 8))) {
323 unsigned long eventmsg;
324 xbt_assert(ptrace(PTRACE_GETEVENTMSG, pid_, 0, &eventmsg) != -1, "Could not get exit status");
325 status = static_cast<int>(eventmsg);
326 if (WIFSIGNALED(status)) {
328 Exploration::get_instance()->report_crash(status);
333 // We don't care about non-lethal signals, just reinject them:
334 if (WIFSTOPPED(status)) {
335 XBT_DEBUG("Stopped with signal %i", (int)WSTOPSIG(status));
338 ptrace(PTRACE_CONT, pid_, 0, WSTOPSIG(status));
340 ptrace(PT_CONTINUE, pid_, (caddr_t)1, WSTOPSIG(status));
342 xbt_assert(errno == 0, "Could not PTRACE_CONT: %s", strerror(errno));
345 else if (WIFSIGNALED(status)) {
347 Exploration::get_instance()->report_crash(status);
348 } else if (WIFEXITED(status)) {
349 XBT_DEBUG("Child process is over");
354 void CheckerSide::handle_waitpid()
356 XBT_DEBUG("%d checks for wait event. %s", getpid(),
357 child_checker_ == nullptr ? "Wait directly." : "Ask our proxy to wait for its child.");
359 if (child_checker_ == nullptr) { // Wait directly
362 while ((pid = waitpid(-1, &status, WNOHANG)) != 0) {
364 if (errno == ECHILD) { // No more children:
365 xbt_assert(not this->running(), "Inconsistent state");
368 xbt_die("Could not wait for pid: %s", strerror(errno));
372 if (pid == get_pid())
373 handle_dead_child(status);
376 } else { // Ask our proxy to wait for us
377 s_mc_message_int_t request = {};
378 request.type = MessageType::WAIT_CHILD;
379 request.value = pid_;
380 xbt_assert(child_checker_->get_channel().send(request) == 0,
381 "Could not ask my child to waitpid its child for me: %s", strerror(errno));
383 s_mc_message_int_t answer;
384 ssize_t answer_size = child_checker_->get_channel().receive(answer);
385 xbt_assert(answer_size != -1, "Could not receive message");
386 xbt_assert(answer.type == MessageType::WAIT_CHILD_REPLY,
387 "The received message is not the WAIT_CHILD_REPLY I was expecting but of type %s",
388 to_c_str(answer.type));
389 xbt_assert(answer_size == sizeof answer, "Broken message (size=%zd; expected %zu)", answer_size, sizeof answer);
390 handle_dead_child(answer.value);
393 } // namespace simgrid::mc