1 /* A thread pool (C++ version). */
3 /* Copyright (c) 2004-2019 The SimGrid Team. All rights reserved. */
5 /* This program is free software; you can redistribute it and/or modify it
6 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include "src/internal_config.h" // HAVE_FUTEX_H
12 #include "src/kernel/context/Context.hpp"
13 #include "xbt/xbt_os_thread.h"
15 #include <boost/optional.hpp>
16 #include <condition_variable>
21 #include <linux/futex.h>
22 #include <sys/syscall.h>
25 XBT_LOG_EXTERNAL_CATEGORY(xbt_parmap);
30 /** @addtogroup XBT_parmap
32 * @brief Parallel map class
35 template <typename T> class Parmap {
37 Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode);
38 Parmap(const Parmap&) = delete;
39 Parmap& operator=(const Parmap&) = delete;
41 void apply(void (*fun)(T), const std::vector<T>& data);
42 boost::optional<T> next();
45 enum Flag { PARMAP_WORK, PARMAP_DESTROY };
48 * @brief Thread data transmission structure
52 ThreadData(Parmap<T>& parmap, int id) : parmap(parmap), worker_id(id) {}
58 * @brief Synchronization object (different specializations).
62 explicit Synchro(Parmap<T>& parmap) : parmap(parmap) {}
63 virtual ~Synchro() = default;
65 * @brief Wakes all workers and waits for them to finish the tasks.
67 * This function is called by the controller thread.
69 virtual void master_signal() = 0;
71 * @brief Starts the parmap: waits for all workers to be ready and returns.
73 * This function is called by the controller thread.
75 virtual void master_wait() = 0;
77 * @brief Ends the parmap: wakes the controller thread when all workers terminate.
79 * This function is called by all worker threads when they end (not including the controller).
81 virtual void worker_signal() = 0;
83 * @brief Waits for some work to process.
85 * This function is called by each worker thread (not including the controller) when it has no more work to do.
87 * @param round the expected round number
89 virtual void worker_wait(unsigned) = 0;
94 class PosixSynchro : public Synchro {
96 explicit PosixSynchro(Parmap<T>& parmap);
100 void worker_signal();
101 void worker_wait(unsigned round);
104 std::condition_variable ready_cond;
105 std::mutex ready_mutex;
106 std::condition_variable done_cond;
107 std::mutex done_mutex;
111 class FutexSynchro : public Synchro {
113 explicit FutexSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
114 void master_signal();
116 void worker_signal();
117 void worker_wait(unsigned);
120 static void futex_wait(unsigned* uaddr, unsigned val);
121 static void futex_wake(unsigned* uaddr, unsigned val);
125 class BusyWaitSynchro : public Synchro {
127 explicit BusyWaitSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
128 void master_signal();
130 void worker_signal();
131 void worker_wait(unsigned);
134 static void* worker_main(void* arg);
135 Synchro* new_synchro(e_xbt_parmap_mode_t mode);
138 Flag status; /**< is the parmap active or being destroyed? */
139 unsigned work_round; /**< index of the current round */
140 std::vector<std::thread*> workers; /**< worker thread handlers */
141 unsigned num_workers; /**< total number of worker threads including the controller */
142 Synchro* synchro; /**< synchronization object */
144 unsigned thread_counter = 0; /**< number of workers that have done the work */
145 void (*fun)(const T) = nullptr; /**< function to run in parallel on each element of data */
146 const std::vector<T>* data = nullptr; /**< parameters to pass to fun in parallel */
147 std::atomic<unsigned> index; /**< index of the next element of data to pick */
151 * @brief Creates a parallel map object
152 * @param num_workers number of worker threads to create
153 * @param mode how to synchronize the worker threads
155 template <typename T> Parmap<T>::Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode)
157 XBT_CDEBUG(xbt_parmap, "Create new parmap (%u workers)", num_workers);
159 /* Initialize the thread pool data structure */
160 this->status = PARMAP_WORK;
161 this->work_round = 0;
162 this->workers.reserve(num_workers);
163 this->num_workers = num_workers;
164 this->synchro = new_synchro(mode);
166 /* Create the pool of worker threads (the caller of apply() will be worker[0]) */
167 this->workers[0] = nullptr;
168 XBT_ATTRIB_UNUSED unsigned int core_bind = 0;
170 for (unsigned i = 1; i < num_workers; i++) {
171 this->workers[i] = new std::thread(worker_main, new ThreadData(*this, i));
173 /* Bind the worker to a core if possible */
174 #if HAVE_PTHREAD_SETAFFINITY
175 pthread_t pthread = this->workers[i]->native_handle();
178 CPU_SET(core_bind, &cpuset);
179 pthread_setaffinity_np(pthread, sizeof(cpu_set_t), &cpuset);
180 if (core_bind != std::thread::hardware_concurrency() - 1)
189 * @brief Destroys a parmap
191 template <typename T> Parmap<T>::~Parmap()
193 status = PARMAP_DESTROY;
194 synchro->master_signal();
196 for (unsigned i = 1; i < num_workers; i++)
204 * @brief Applies a list of tasks in parallel.
205 * @param fun the function to call in parallel
206 * @param data each element of this vector will be passed as an argument to fun
208 template <typename T> void Parmap<T>::apply(void (*fun)(T), const std::vector<T>& data)
210 /* Assign resources to worker threads (we are maestro here)*/
214 this->synchro->master_signal(); // maestro runs futex_wake to wake all the minions (the working threads)
215 this->work(); // maestro works with its minions
216 this->synchro->master_wait(); // When there is no more work to do, then maestro waits for the last minion to stop
217 XBT_CDEBUG(xbt_parmap, "Job done"); // ... and proceeds
221 * @brief Returns a next task to process.
223 * Worker threads call this function to get more work.
225 * @return the next task to process, or throws a std::out_of_range exception if there is no more work
227 template <typename T> boost::optional<T> Parmap<T>::next()
229 unsigned index = this->index.fetch_add(1, std::memory_order_relaxed);
230 if (index < this->data->size())
231 return (*this->data)[index];
237 * @brief Main work loop: applies fun to elements in turn.
239 template <typename T> void Parmap<T>::work()
241 unsigned length = this->data->size();
242 unsigned index = this->index.fetch_add(1, std::memory_order_relaxed);
243 while (index < length) {
244 this->fun((*this->data)[index]);
245 index = this->index.fetch_add(1, std::memory_order_relaxed);
250 * Get a synchronization object for given mode.
251 * @param mode the synchronization mode
253 template <typename T> typename Parmap<T>::Synchro* Parmap<T>::new_synchro(e_xbt_parmap_mode_t mode)
255 if (mode == XBT_PARMAP_DEFAULT) {
257 mode = XBT_PARMAP_FUTEX;
259 mode = XBT_PARMAP_POSIX;
264 case XBT_PARMAP_POSIX:
265 res = new PosixSynchro(*this);
267 case XBT_PARMAP_FUTEX:
269 res = new FutexSynchro(*this);
271 xbt_die("Futex is not available on this OS.");
274 case XBT_PARMAP_BUSY_WAIT:
275 res = new BusyWaitSynchro(*this);
283 /** @brief Main function of a worker thread */
284 template <typename T> void* Parmap<T>::worker_main(void* arg)
286 ThreadData* data = static_cast<ThreadData*>(arg);
287 Parmap<T>& parmap = data->parmap;
289 smx_context_t context = SIMIX_context_new(std::function<void()>(), nullptr, nullptr);
290 kernel::context::Context::set_current(context);
292 XBT_CDEBUG(xbt_parmap, "New worker thread created");
294 /* Worker's main loop */
296 round++; // New scheduling round
297 parmap.synchro->worker_wait(round);
298 if (parmap.status == PARMAP_DESTROY)
301 XBT_CDEBUG(xbt_parmap, "Worker %d got a job", data->worker_id);
303 parmap.synchro->worker_signal();
304 XBT_CDEBUG(xbt_parmap, "Worker %d has finished", data->worker_id);
306 /* We are destroying the parmap */
312 template <typename T> Parmap<T>::PosixSynchro::PosixSynchro(Parmap<T>& parmap) : Synchro(parmap)
316 template <typename T> Parmap<T>::PosixSynchro::~PosixSynchro()
320 template <typename T> void Parmap<T>::PosixSynchro::master_signal()
322 std::unique_lock<std::mutex> lk(ready_mutex);
323 this->parmap.thread_counter = 1;
324 this->parmap.work_round++;
325 /* wake all workers */
326 ready_cond.notify_all();
329 template <typename T> void Parmap<T>::PosixSynchro::master_wait()
331 std::unique_lock<std::mutex> lk(done_mutex);
332 while (this->parmap.thread_counter < this->parmap.num_workers) {
333 /* wait for all workers to be ready */
338 template <typename T> void Parmap<T>::PosixSynchro::worker_signal()
340 std::unique_lock<std::mutex> lk(done_mutex);
341 this->parmap.thread_counter++;
342 if (this->parmap.thread_counter == this->parmap.num_workers) {
343 /* all workers have finished, wake the controller */
344 done_cond.notify_one();
348 template <typename T> void Parmap<T>::PosixSynchro::worker_wait(unsigned round)
350 std::unique_lock<std::mutex> lk(ready_mutex);
351 /* wait for more work */
352 while (this->parmap.work_round != round) {
358 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wait(unsigned* uaddr, unsigned val)
360 XBT_CVERB(xbt_parmap, "Waiting on futex %p", uaddr);
361 syscall(SYS_futex, uaddr, FUTEX_WAIT_PRIVATE, val, nullptr, nullptr, 0);
364 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wake(unsigned* uaddr, unsigned val)
366 XBT_CVERB(xbt_parmap, "Waking futex %p", uaddr);
367 syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, val, nullptr, nullptr, 0);
370 template <typename T> void Parmap<T>::FutexSynchro::master_signal()
372 __atomic_store_n(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
373 __atomic_add_fetch(&this->parmap.work_round, 1, __ATOMIC_SEQ_CST);
374 /* wake all workers */
375 futex_wake(&this->parmap.work_round, std::numeric_limits<int>::max());
378 template <typename T> void Parmap<T>::FutexSynchro::master_wait()
380 unsigned count = __atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST);
381 while (count < this->parmap.num_workers) {
382 /* wait for all workers to be ready */
383 futex_wait(&this->parmap.thread_counter, count);
384 count = __atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST);
388 template <typename T> void Parmap<T>::FutexSynchro::worker_signal()
390 unsigned count = __atomic_add_fetch(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
391 if (count == this->parmap.num_workers) {
392 /* all workers have finished, wake the controller */
393 futex_wake(&this->parmap.thread_counter, std::numeric_limits<int>::max());
397 template <typename T> void Parmap<T>::FutexSynchro::worker_wait(unsigned round)
399 unsigned work_round = __atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST);
400 /* wait for more work */
401 while (work_round != round) {
402 futex_wait(&this->parmap.work_round, work_round);
403 work_round = __atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST);
408 template <typename T> void Parmap<T>::BusyWaitSynchro::master_signal()
410 __atomic_store_n(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
411 __atomic_add_fetch(&this->parmap.work_round, 1, __ATOMIC_SEQ_CST);
414 template <typename T> void Parmap<T>::BusyWaitSynchro::master_wait()
416 while (__atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST) < this->parmap.num_workers) {
417 std::this_thread::yield();
421 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_signal()
423 __atomic_add_fetch(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
426 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_wait(unsigned round)
428 /* wait for more work */
429 while (__atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST) != round) {
430 std::this_thread::yield();