From d8308c2cacc831e01b98fa8e0d593f1480c25892 Mon Sep 17 00:00:00 2001 From: cristianrosa Date: Tue, 14 Dec 2010 16:27:28 +0000 Subject: [PATCH] Reimplement parmap using futex based barriers. The fastest barrier implementation ever ;) git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@9239 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- src/xbt/parmap.c | 188 ++++++++++++++++++--------------------- src/xbt/parmap_private.h | 26 ++++-- 2 files changed, 105 insertions(+), 109 deletions(-) diff --git a/src/xbt/parmap.c b/src/xbt/parmap.c index 0034d95f13..b6de91b177 100644 --- a/src/xbt/parmap.c +++ b/src/xbt/parmap.c @@ -3,7 +3,10 @@ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ - +#include +#include +#include +#include #include "parmap_private.h" XBT_LOG_NEW_DEFAULT_SUBCATEGORY(xbt_parmap, xbt, "parmap: parallel map"); @@ -11,6 +14,9 @@ XBT_LOG_NEW_SUBCATEGORY(xbt_parmap_unit, xbt_parmap, "parmap unit testing"); static void *_xbt_parmap_worker_main(void *parmap); +static void futex_wait(int *uaddr, int val); +static void futex_wake(int *uaddr, int val); + xbt_parmap_t xbt_parmap_new(unsigned int num_workers) { unsigned int i; @@ -18,18 +24,15 @@ xbt_parmap_t xbt_parmap_new(unsigned int num_workers) DEBUG1("Create new parmap (%u workers)", num_workers); - /* Initialize thread pool data structure */ + /* Initialize the thread pool data structure */ xbt_parmap_t parmap = xbt_new0(s_xbt_parmap_t, 1); - parmap->mutex = xbt_os_mutex_init(); - parmap->job_posted = xbt_os_cond_init(); - parmap->all_done = xbt_os_cond_init(); - parmap->flags = xbt_new0(e_xbt_parmap_flag_t, num_workers + 1); parmap->num_workers = num_workers; - parmap->num_idle_workers = 0; - parmap->workers_max_id = 0; + parmap->status = PARMAP_WORK; - /* Init our flag to wait (for workers' initialization) */ - parmap->flags[num_workers] = PARMAP_WAIT; + parmap->workers_ready = xbt_new0(s_xbt_barrier_t, 1); + xbt_barrier_init(parmap->workers_ready, num_workers + 1); + parmap->workers_done = xbt_new0(s_xbt_barrier_t, 1); + xbt_barrier_init(parmap->workers_done, num_workers + 1); /* Create the pool of worker threads */ for(i=0; i < num_workers; i++){ @@ -37,12 +40,6 @@ xbt_parmap_t xbt_parmap_new(unsigned int num_workers) xbt_os_thread_detach(worker); } - /* wait for the workers to initialize */ - xbt_os_mutex_acquire(parmap->mutex); - while(parmap->flags[num_workers] == PARMAP_WAIT) - xbt_os_cond_wait(parmap->all_done, parmap->mutex); - xbt_os_mutex_release(parmap->mutex); - return parmap; } @@ -50,62 +47,31 @@ void xbt_parmap_destroy(xbt_parmap_t parmap) { DEBUG1("Destroy parmap %p", parmap); - unsigned int i; - - /* Lock the parmap, then signal every worker an wait for each to finish */ - xbt_os_mutex_acquire(parmap->mutex); - for(i=0; i < parmap->num_workers; i++){ - parmap->flags[i] = PARMAP_DESTROY; - } + parmap->status = PARMAP_DESTROY; - xbt_os_cond_broadcast(parmap->job_posted); - while(parmap->num_workers){ - DEBUG1("Still %u workers, waiting...", parmap->num_workers); - xbt_os_cond_wait(parmap->all_done, parmap->mutex); - } + xbt_barrier_wait(parmap->workers_ready); + DEBUG0("Kill job sent"); + xbt_barrier_wait(parmap->workers_done); - /* Destroy pool's data structures */ - xbt_os_cond_destroy(parmap->job_posted); - xbt_os_cond_destroy(parmap->all_done); - xbt_free(parmap->flags); - xbt_os_mutex_release(parmap->mutex); - xbt_os_mutex_destroy(parmap->mutex); + xbt_free(parmap->workers_ready); + xbt_free(parmap->workers_done); xbt_free(parmap); } -void xbt_parmap_apply(xbt_parmap_t parmap, void_f_pvoid_t fun, xbt_dynar_t data) + void xbt_parmap_apply(xbt_parmap_t parmap, void_f_pvoid_t fun, xbt_dynar_t data) { - unsigned int i; - unsigned int myflag_idx = parmap->num_workers; - - /* Assign resources to worker threads */ - xbt_os_mutex_acquire(parmap->mutex); + /* Assign resources to worker threads*/ parmap->fun = fun; parmap->data = data; - parmap->num_idle_workers = 0; - - /* Set worker flags to work */ - for(i=0; i < parmap->num_workers; i++){ - parmap->flags[i] = PARMAP_WORK; - } - - /* Set our flag to wait (for the job to be completed)*/ - parmap->flags[myflag_idx] = PARMAP_WAIT; /* Notify workers that there is a job */ - xbt_os_cond_broadcast(parmap->job_posted); + xbt_barrier_wait(parmap->workers_ready); DEBUG0("Job dispatched, lets wait..."); - - /* wait for the workers to finish */ - while(parmap->flags[myflag_idx] == PARMAP_WAIT) - xbt_os_cond_wait(parmap->all_done, parmap->mutex); + xbt_barrier_wait(parmap->workers_done); DEBUG0("Job done"); parmap->fun = NULL; parmap->data = NULL; - - xbt_os_mutex_release(parmap->mutex); - return; } static void *_xbt_parmap_worker_main(void *arg) @@ -114,60 +80,79 @@ static void *_xbt_parmap_worker_main(void *arg) xbt_parmap_t parmap = (xbt_parmap_t)arg; /* Fetch a worker id */ - xbt_os_mutex_acquire(parmap->mutex); - worker_id = parmap->workers_max_id++; - xbt_os_mutex_release(parmap->mutex); + worker_id = __sync_fetch_and_add(&parmap->workers_max_id, 1); DEBUG1("New worker thread created (%u)", worker_id); /* Worker's main loop */ while(1){ - xbt_os_mutex_acquire(parmap->mutex); - parmap->flags[worker_id] = PARMAP_WAIT; - parmap->num_idle_workers++; - - /* If everybody is done set the parmap work flag and signal it */ - if(parmap->num_idle_workers == parmap->num_workers){ - DEBUG1("Worker %u: All done, signal the parmap", worker_id); - parmap->flags[parmap->num_workers] = PARMAP_WORK; - xbt_os_cond_signal(parmap->all_done); - } + xbt_barrier_wait(parmap->workers_ready); + + if(parmap->status == PARMAP_WORK){ + DEBUG1("Worker %u got a job", worker_id); - /* If the wait flag is set then ... wait. */ - while(parmap->flags[worker_id] == PARMAP_WAIT) - xbt_os_cond_wait(parmap->job_posted, parmap->mutex); + /* Compute how much data does every worker gets */ + data_size = (xbt_dynar_length(parmap->data) / parmap->num_workers) + + ((xbt_dynar_length(parmap->data) % parmap->num_workers) ? 1 : 0); - DEBUG1("Worker %u got a job", worker_id); + /* Each worker data segment starts in a position associated with its id*/ + data_start = data_size * worker_id; - /* If we are shutting down, the last worker is going to signal the - * parmap so it can finish destroying the data structure */ - if(parmap->flags[worker_id] == PARMAP_DESTROY){ + /* The end of the worker data segment must be bounded by the end of the data vector */ + data_end = MIN(data_start + data_size, xbt_dynar_length(parmap->data)); + + DEBUG4("Worker %u: data_start=%u data_end=%u (data_size=%u)", + worker_id, data_start, data_end, data_size); + + /* While the worker don't pass the end of it data segment apply the function */ + while(data_start < data_end){ + parmap->fun(*(void **)xbt_dynar_get_ptr(parmap->data, data_start)); + data_start++; + } + + xbt_barrier_wait(parmap->workers_done); + + /* We are destroying the parmap */ + }else{ + xbt_barrier_wait(parmap->workers_done); DEBUG1("Shutting down worker %u", worker_id); - parmap->num_workers--; - if(parmap->num_workers == 0) - xbt_os_cond_signal(parmap->all_done); - xbt_os_mutex_release(parmap->mutex); return NULL; } - xbt_os_mutex_release(parmap->mutex); - - /* Compute how much data does every worker gets */ - data_size = (xbt_dynar_length(parmap->data) / parmap->num_workers) - + ((xbt_dynar_length(parmap->data) % parmap->num_workers) ? 1 : 0); + } +} - /* Each worker data segment starts in a position associated with its id*/ - data_start = data_size * worker_id; +static void futex_wait(int *uaddr, int val) +{ + DEBUG1("Waiting on futex %d", *uaddr); + syscall(SYS_futex, uaddr, FUTEX_WAIT, val, NULL, NULL, 0); +} - /* The end of the worker data segment must be bounded by the end of the data vector */ - data_end = MIN(data_start + data_size, xbt_dynar_length(parmap->data)); +static void futex_wake(int *uaddr, int val) +{ + DEBUG1("Waking futex %d", *uaddr); + syscall(SYS_futex, uaddr, FUTEX_WAKE, val, NULL, NULL, 0); +} - DEBUG4("Worker %u: data_start=%u data_end=%u (data_size=%u)", worker_id, data_start, data_end, data_size); +/* Futex based implementation of the barrier */ +void xbt_barrier_init(xbt_barrier_t barrier, unsigned int threads_to_wait) +{ + barrier->threads_to_wait = threads_to_wait; + barrier->thread_count = 0; +} - /* While the worker don't pass the end of it data segment apply the function */ - while(data_start < data_end){ - parmap->fun(*(void **)xbt_dynar_get_ptr(parmap->data, data_start)); - data_start++; - } +void xbt_barrier_wait(xbt_barrier_t barrier) +{ + int myflag = 0; + unsigned int mycount = 0; + + myflag = barrier->futex; + mycount = __sync_add_and_fetch(&barrier->thread_count, 1); + if(mycount < barrier->threads_to_wait){ + futex_wait(&barrier->futex, myflag); + }else{ + barrier->futex = __sync_add_and_fetch(&barrier->futex, 1); + barrier->thread_count = 0; + futex_wake(&barrier->futex, barrier->threads_to_wait); } } @@ -186,24 +171,25 @@ void fun(void *arg); void fun(void *arg) { - INFO1("I'm job %lu", (unsigned long)arg); + //INFO1("I'm job %lu", (unsigned long)arg); } XBT_TEST_UNIT("basic", test_parmap_basic, "Basic usage") { xbt_test_add0("Create the parmap"); - unsigned long j; + unsigned long i,j; xbt_dynar_t data = xbt_dynar_new(sizeof(void *), NULL); /* Create the parallel map */ - parmap = xbt_parmap_new(5); + parmap = xbt_parmap_new(10); - for(j=0; j < 200; j++){ + for(j=0; j < 100; j++){ xbt_dynar_push_as(data, void *, (void *)j); } - xbt_parmap_apply(parmap, fun, data); + for(i=0; i < 5; i++) + xbt_parmap_apply(parmap, fun, data); /* Destroy the parmap */ xbt_parmap_destroy(parmap); diff --git a/src/xbt/parmap_private.h b/src/xbt/parmap_private.h index 283b025774..02da424924 100644 --- a/src/xbt/parmap_private.h +++ b/src/xbt/parmap_private.h @@ -13,22 +13,32 @@ #include "xbt/dynar.h" #include "xbt/log.h" + typedef enum{ - PARMAP_WAIT = 0, - PARMAP_WORK, - PARMAP_DESTROY, + PARMAP_WORK = 0, + PARMAP_DESTROY } e_xbt_parmap_flag_t; +typedef struct s_xbt_barrier{ + int futex; + unsigned int thread_count; + unsigned int threads_to_wait; +} s_xbt_barrier_t, *xbt_barrier_t; + +/* Wait for at least num_threads threads to arrive to the barrier */ +void xbt_barrier_init(xbt_barrier_t barrier, unsigned int threads_to_wait); +void xbt_barrier_wait(xbt_barrier_t barrier); + + typedef struct s_xbt_parmap { - xbt_os_mutex_t mutex; /* pool's mutex */ - xbt_os_cond_t job_posted; /* job is posted */ - xbt_os_cond_t all_done; /* job is done */ - e_xbt_parmap_flag_t *flags; /* Per thread flag + lastone for the parmap */ + e_xbt_parmap_flag_t status; + xbt_barrier_t workers_ready; + xbt_barrier_t workers_done; unsigned int num_workers; - unsigned int num_idle_workers; unsigned int workers_max_id; void_f_pvoid_t fun; xbt_dynar_t data; } s_xbt_parmap_t; + #endif -- 2.20.1