Augustin Degomme <adegomme@gmail.com> <degomme@idpann>
Augustin Degomme <adegomme@gmail.com> <degomme@idpann.imag.fr>
Augustin Degomme <adegomme@gmail.com> <degomme@localhost.localdomain>
+Augustin Degomme <adegomme@gmail.com> <degomme@pilipili2.imag.fr>
Augustin Degomme <adegomme@gmail.com> <degomme@wasabi>
Jean-Emile Dartois <jean-emile.dartois@b-com.com>
Jean-Emile Dartois <jean-emile.dartois@b-com.com> <jedartois@gmail.com>
Marion Guthmuller <marion.guthmuller@inria.fr> <marion.guthmuller@loria.fr>
Ahmed Harbaoui <amad206@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Christian Heinrich <franz-christian.heinrich@inria.fr> <christian.heinrich@livando.com>
+Christian Heinrich <franz-christian.heinrich@inria.fr> <christian@gladbachcity.de>
Jean-Baptiste Hervé <jean-baptiste.herve@esial.net>
Takahiro Hirofuchi <t.hirofuchi+sg@aist.go.jp> <thirofuchi@debian.(none)>
Sascha Hunold <sascha@uni-hd.de> <sahu@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
AMPI_Iteration_in(MPI_COMM_WORLD);
simgrid::s4u::this_actor::sleep_for(rank);
AMPI_Iteration_out(MPI_COMM_WORLD);
- if (rank == 0)
+ if (rank == 0) {
free(pointer);
+ pointer = nullptr;
+ }
AMPI_Migrate(MPI_COMM_WORLD);
if (rank != 0)
- free(pointer);
+ free(pointer);
MPI_Finalize();
return 0;
void worker_wait(unsigned);
private:
- static void futex_wait(unsigned* uaddr, unsigned val);
- static void futex_wake(unsigned* uaddr, unsigned val);
+ static void futex_wait(std::atomic_uint* uaddr, unsigned val);
+ static void futex_wake(std::atomic_uint* uaddr, unsigned val);
};
#endif
void work();
Flag status; /**< is the parmap active or being destroyed? */
- unsigned work_round; /**< index of the current round */
+ std::atomic_uint work_round; /**< index of the current round */
std::vector<std::thread*> workers; /**< worker thread handlers */
unsigned num_workers; /**< total number of worker threads including the controller */
Synchro* synchro; /**< synchronization object */
- unsigned thread_counter = 0; /**< number of workers that have done the work */
+ std::atomic_uint thread_counter{0}; /**< number of workers that have done the work */
void (*fun)(const T) = nullptr; /**< function to run in parallel on each element of data */
const std::vector<T>* data = nullptr; /**< parameters to pass to fun in parallel */
- std::atomic<unsigned> index; /**< index of the next element of data to pick */
+ std::atomic_uint index; /**< index of the next element of data to pick */
};
/**
}
#if HAVE_FUTEX_H
-template <typename T> inline void Parmap<T>::FutexSynchro::futex_wait(unsigned* uaddr, unsigned val)
+template <typename T> inline void Parmap<T>::FutexSynchro::futex_wait(std::atomic_uint* uaddr, unsigned val)
{
XBT_CVERB(xbt_parmap, "Waiting on futex %p", uaddr);
syscall(SYS_futex, uaddr, FUTEX_WAIT_PRIVATE, val, nullptr, nullptr, 0);
}
-template <typename T> inline void Parmap<T>::FutexSynchro::futex_wake(unsigned* uaddr, unsigned val)
+template <typename T> inline void Parmap<T>::FutexSynchro::futex_wake(std::atomic_uint* uaddr, unsigned val)
{
XBT_CVERB(xbt_parmap, "Waking futex %p", uaddr);
syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, val, nullptr, nullptr, 0);
template <typename T> void Parmap<T>::FutexSynchro::master_signal()
{
- __atomic_store_n(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
- __atomic_add_fetch(&this->parmap.work_round, 1, __ATOMIC_SEQ_CST);
+ this->parmap.thread_counter.store(1);
+ this->parmap.work_round.fetch_add(1);
/* wake all workers */
futex_wake(&this->parmap.work_round, std::numeric_limits<int>::max());
}
template <typename T> void Parmap<T>::FutexSynchro::master_wait()
{
- unsigned count = __atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST);
+ unsigned count = this->parmap.thread_counter.load();
while (count < this->parmap.num_workers) {
/* wait for all workers to be ready */
futex_wait(&this->parmap.thread_counter, count);
- count = __atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST);
+ count = this->parmap.thread_counter.load();
}
}
template <typename T> void Parmap<T>::FutexSynchro::worker_signal()
{
- unsigned count = __atomic_add_fetch(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
+ unsigned count = this->parmap.thread_counter.fetch_add(1) + 1;
if (count == this->parmap.num_workers) {
/* all workers have finished, wake the controller */
futex_wake(&this->parmap.thread_counter, std::numeric_limits<int>::max());
template <typename T> void Parmap<T>::FutexSynchro::worker_wait(unsigned round)
{
- unsigned work_round = __atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST);
+ unsigned work_round = this->parmap.work_round.load();
/* wait for more work */
while (work_round != round) {
futex_wait(&this->parmap.work_round, work_round);
- work_round = __atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST);
+ work_round = this->parmap.work_round.load();
}
}
#endif
template <typename T> void Parmap<T>::BusyWaitSynchro::master_signal()
{
- __atomic_store_n(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
- __atomic_add_fetch(&this->parmap.work_round, 1, __ATOMIC_SEQ_CST);
+ this->parmap.thread_counter.store(1);
+ this->parmap.work_round.fetch_add(1);
}
template <typename T> void Parmap<T>::BusyWaitSynchro::master_wait()
{
- while (__atomic_load_n(&this->parmap.thread_counter, __ATOMIC_SEQ_CST) < this->parmap.num_workers) {
+ while (this->parmap.thread_counter.load() < this->parmap.num_workers) {
std::this_thread::yield();
}
}
template <typename T> void Parmap<T>::BusyWaitSynchro::worker_signal()
{
- __atomic_add_fetch(&this->parmap.thread_counter, 1, __ATOMIC_SEQ_CST);
+ this->parmap.thread_counter.fetch_add(1);
}
template <typename T> void Parmap<T>::BusyWaitSynchro::worker_wait(unsigned round)
{
/* wait for more work */
- while (__atomic_load_n(&this->parmap.work_round, __ATOMIC_SEQ_CST) != round) {
+ while (this->parmap.work_round.load() != round) {
std::this_thread::yield();
}
}
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
-#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file
-#include "src/include/catch.hpp"
+#include "catch.hpp"
#include "simgrid/kernel/resource/Resource.hpp"
#include "src/kernel/resource/profile/trace_mgr.hpp"
REQUIRE(it == insertedIt); // Check that we find what we've put
if (value >= 0) {
- resource->apply_event(it, value);
res.push_back(simgrid::kernel::profile::DatedValue(thedate, value));
} else {
XBT_DEBUG("%.1f: ignore an event (idx: %u)\n", thedate, it->idx);
}
+ resource->apply_event(it, value);
}
tmgr_finalize();
return res;
datatype->extent(&true_lb, &true_extent);
//MPI_Op *op_ptr;
- //is_commutative = op->is_commutative();
+ is_commutative = op->is_commutative();
{
int range = 0, range_threshold = 0, range_threshold_intra = 0;
int range = 0;
int range_threshold = 0;
int range_threshold_intra = 0;
- int is_homogeneous, is_contig;
+ int is_homogeneous;
+//, is_contig;
MPI_Aint type_size;
//, position;
void *tmp_buf = NULL;
comm_size = comm->size();
//rank = comm->rank();
- is_contig=1;
+ //is_contig=1;
/* if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN)*/
/* is_contig = 1;*/
/* else {*/
mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold];
#endif
if (two_level_bcast == 1) {
- if (not is_contig || not is_homogeneous) {
+ if (not is_homogeneous) {
+ //if (not is_contig || not is_homogeneous) {
tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes);
/* position = 0;*/
#ifdef CHANNEL_MRAIL_GEN2
if ((mv2_enable_zcpy_bcast == 1) &&
(&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) {
- if (not is_contig || not is_homogeneous) {
+ if (not is_homogeneous) {
+ //if (not is_contig || not is_homogeneous) {
mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
} else {
mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(buffer, count, datatype,
#endif /* defined(CHANNEL_MRAIL_GEN2) */
{
shmem_comm = comm->get_intra_comm();
- if (not is_contig || not is_homogeneous) {
+ if (not is_homogeneous) {
+ //if (not is_contig || not is_homogeneous) {
MPIR_Bcast_tune_inter_node_helper_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
} else {
MPIR_Bcast_tune_inter_node_helper_MV2(buffer, count, datatype, root, comm);
root = INTRA_NODE_ROOT;
- if (not is_contig || not is_homogeneous) {
+ //if (not is_contig || not is_homogeneous) {
+ if (not is_homogeneous) {
mpi_errno = MV2_Bcast_intra_node_function(tmp_buf, nbytes, MPI_BYTE, root, shmem_comm);
} else {
mpi_errno = MV2_Bcast_intra_node_function(buffer, count,
* See https://www.akkadia.org/drepper/dsohowto.pdf
* and https://lists.freebsd.org/pipermail/freebsd-current/2016-March/060284.html
*/
-#if !defined(RTLD_DEEPBIND) || HAVE_SANITIZER_ADDRESS || HAVE_SANITIZER_THREAD
+#if !RTLD_DEEPBIND || HAVE_SANITIZER_ADDRESS || HAVE_SANITIZER_THREAD
#define WANT_RTLD_DEEPBIND 0
#else
#define WANT_RTLD_DEEPBIND RTLD_DEEPBIND
xbt_assert(fdout >= 0, "Cannot write into %s", target.c_str());
XBT_DEBUG("Copy %" PRIdMAX " bytes into %s", static_cast<intmax_t>(fdin_size), target.c_str());
- bool slow_copy = true;
#if SG_HAVE_SENDFILE
ssize_t sent_size = sendfile(fdout, fdin, NULL, fdin_size);
- if (sent_size == fdin_size)
- slow_copy = false;
- else if (sent_size != -1 || errno != ENOSYS)
- xbt_die("Error while copying %s: only %zd bytes copied instead of %" PRIdMAX " (errno: %d -- %s)", target.c_str(),
- sent_size, static_cast<intmax_t>(fdin_size), errno, strerror(errno));
+ xbt_assert(sent_size == fdin_size || (sent_size == -1 && errno == ENOSYS),
+ "Error while copying %s: only %zd bytes copied instead of %" PRIdMAX " (errno: %d -- %s)", target.c_str(),
+ sent_size, static_cast<intmax_t>(fdin_size), errno, strerror(errno));
+#else
+ ssize_t sent_size = -1;
#endif
- if (slow_copy) {
+ if (sent_size != fdin_size) { // sendfile is not available
const int bufsize = 1024 * 1024 * 4;
char buf[bufsize];
while (int got = read(fdin, buf, bufsize)) {
struct stat fdin_stat;
stat(executable.c_str(), &fdin_stat);
off_t fdin_size = fdin_stat.st_size;
- static std::size_t rank = 0;
std::string libnames = simgrid::config::get_value<std::string>("smpi/privatize-libs");
if (not libnames.empty()) {
simix_global->default_function = [executable, fdin_size](std::vector<std::string> args) {
return std::function<void()>([executable, fdin_size, args] {
+ static std::size_t rank = 0;
// Copy the dynamic library:
std::string target_executable =
executable + "_" + std::to_string(getpid()) + "_" + std::to_string(rank) + ".so";
snprintf(loc, PTR_STRLEN, "%p", mem);
meta.size = size;
meta.data = data;
+ meta.allocated_ptr = mem;
+ meta.allocated_size = size;
allocs_metadata[mem] = meta;
XBT_DEBUG("MMAP %zu to %p", size, mem);
return mem;
if (smpi_iprobe_sleep > 0) {
/** Compute the number of flops we will sleep **/
s4u::this_actor::exec_init(/*nsleeps: See comment above */ nsleeps *
- /*(in seconds)*/ smpi_iprobe_sleep * speed * maxrate)
+ /*(seconds * flop/s -> total flops)*/ smpi_iprobe_sleep * speed * maxrate)
->set_name("iprobe")
+ /* Not the entire CPU can be used when iprobing: This is important for
+ * the energy consumption caused by polling with iprobes.
+ * Note also that the number of flops that was
+ * computed above contains a maxrate factor and is hence reduced (maxrate < 1)
+ */
+ ->set_bound(maxrate*speed)
->start()
->wait();
}
static inline void lock_reset(xbt_mallocator_t m)
{
- m->lock = 0;
+ atomic_flag_clear(&m->lock);
}
static inline void lock_acquire(xbt_mallocator_t m)
{
if (initialization_done > 1) {
- while (__atomic_test_and_set(&m->lock, __ATOMIC_ACQUIRE))
+ while (atomic_flag_test_and_set(&m->lock))
/* nop */;
}
}
static inline void lock_release(xbt_mallocator_t m)
{
if (initialization_done > 1)
- __atomic_clear(&m->lock, __ATOMIC_RELEASE);
+ atomic_flag_clear(&m->lock);
}
/**
#ifndef XBT_MALLOCATOR_PRIVATE_H
#define XBT_MALLOCATOR_PRIVATE_H
+#include <stdatomic.h>
#include <xbt/function_types.h>
typedef struct s_xbt_mallocator {
pvoid_f_void_t new_f; /* function to call when we are running out of objects */
void_f_pvoid_t free_f; /* function to call when we have got too many objects */
void_f_pvoid_t reset_f; /* function to call when an object is released by the user */
- char lock; /* lock to ensure the mallocator is thread-safe */
+ atomic_flag lock; /* lock to ensure the mallocator is thread-safe */
} s_xbt_mallocator_t;
#endif
--- /dev/null
+/* Copyright (c) 2019. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#define CATCH_CONFIG_RUNNER // we supply our own main()
+
+#include "catch.hpp"
+
+#include "xbt/log.h"
+
+int main(int argc, char* argv[])
+{
+ xbt_log_init(&argc, argv);
+ return Catch::Session().run(argc, argv);
+}
extern const char *xbt_log_priority_names[8];
extern int xbt_log_no_loc;
-#define check_overflow(len) \
- if ((rem_size -= (len)) > 0) { \
- p += (len); \
- } else \
- return 0
+#define check_overflow(len) \
+ do { \
+ rem_size -= (len); \
+ if (rem_size <= 0) \
+ return 0; \
+ p += (len); \
+ } while (0)
static int xbt_log_layout_simple_doit(xbt_log_layout_t, xbt_log_event_t ev, const char* fmt)
{
> [rank 13] -> Ginette
> [rank 14] -> Ginette
> [rank 15] -> Ginette
-> [ 0.475403] (5:4@Jupiter) The quickest allreduce was redbcast on rank 4 and took 0.007485
-> [ 0.475403] (6:5@Jupiter) The quickest allreduce was redbcast on rank 5 and took 0.007515
-> [ 0.475403] (7:6@Jupiter) The quickest allreduce was redbcast on rank 6 and took 0.007515
-> [ 0.475403] (8:7@Jupiter) The quickest allreduce was redbcast on rank 7 and took 0.007546
-> [ 0.475616] (13:12@Ginette) The quickest allreduce was mvapich2_two_level on rank 12 and took 0.007247
-> [ 0.475616] (14:13@Ginette) The quickest allreduce was mvapich2_two_level on rank 13 and took 0.007278
-> [ 0.475616] (15:14@Ginette) The quickest allreduce was mvapich2_two_level on rank 14 and took 0.007278
-> [ 0.475616] (16:15@Ginette) The quickest allreduce was ompi on rank 15 and took 0.007263
-> [ 0.477007] (2:1@Tremblay) The quickest allreduce was redbcast on rank 1 and took 0.006006
-> [ 0.477007] (3:2@Tremblay) The quickest allreduce was redbcast on rank 2 and took 0.006006
-> [ 0.477007] (4:3@Tremblay) The quickest allreduce was redbcast on rank 3 and took 0.006037
-> [ 0.478158] (10:9@Fafard) The quickest allreduce was mvapich2_two_level on rank 9 and took 0.006492
-> [ 0.478158] (11:10@Fafard) The quickest allreduce was mvapich2_two_level on rank 10 and took 0.006492
-> [ 0.478158] (12:11@Fafard) The quickest allreduce was mvapich2_two_level on rank 11 and took 0.006523
-> [ 0.478158] (9:8@Fafard) The quickest allreduce was mvapich2_two_level on rank 8 and took 0.006462
-> [ 0.482143] (1:0@Tremblay) For rank 0, the quickest was redbcast : 0.005991 , but global was mvapich2_two_level : 0.008672 at max
+> [ 0.475378] (5:4@Jupiter) The quickest allreduce was redbcast on rank 4 and took 0.007485
+> [ 0.475378] (6:5@Jupiter) The quickest allreduce was redbcast on rank 5 and took 0.007515
+> [ 0.475378] (7:6@Jupiter) The quickest allreduce was redbcast on rank 6 and took 0.007515
+> [ 0.475378] (8:7@Jupiter) The quickest allreduce was redbcast on rank 7 and took 0.007546
+> [ 0.475591] (13:12@Ginette) The quickest allreduce was mvapich2 on rank 12 and took 0.007247
+> [ 0.475591] (14:13@Ginette) The quickest allreduce was mvapich2 on rank 13 and took 0.007278
+> [ 0.475591] (15:14@Ginette) The quickest allreduce was mvapich2 on rank 14 and took 0.007278
+> [ 0.475591] (16:15@Ginette) The quickest allreduce was ompi on rank 15 and took 0.007263
+> [ 0.476982] (2:1@Tremblay) The quickest allreduce was redbcast on rank 1 and took 0.006006
+> [ 0.476982] (3:2@Tremblay) The quickest allreduce was redbcast on rank 2 and took 0.006006
+> [ 0.476982] (4:3@Tremblay) The quickest allreduce was redbcast on rank 3 and took 0.006037
+> [ 0.478133] (10:9@Fafard) The quickest allreduce was mvapich2 on rank 9 and took 0.006492
+> [ 0.478133] (11:10@Fafard) The quickest allreduce was mvapich2 on rank 10 and took 0.006492
+> [ 0.478133] (12:11@Fafard) The quickest allreduce was mvapich2 on rank 11 and took 0.006523
+> [ 0.478133] (9:8@Fafard) The quickest allreduce was mvapich2 on rank 8 and took 0.006462
+> [ 0.482118] (1:0@Tremblay) For rank 0, the quickest was redbcast : 0.005991 , but global was mvapich2 : 0.008672 at max
> [0] sndbuf=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ]
> [1] sndbuf=[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 ]
> [2] sndbuf=[32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 ]
# 3179: deprecated conversion of string literal to char* (should be const char*)
# 191: type qualifier is meaningless on cast type
# 597: entity-kind "entity" will not be called for implicit or explicit conversions
- set(warnCFLAGS "${warnCFLAGS} -wd1418 -wd191 -wd3179 -ww597")
+ # 2330: argument of type "type" is incompatible with parameter of type "type" (dropping qualifiers)
+ set(warnCFLAGS "${warnCFLAGS} -wd1418 -wd191 -wd3179 -ww597 -ww2330")
endif()
set(warnCXXFLAGS "${warnCFLAGS} -Wall -Wextra -Wunused -Wmissing-declarations -Wpointer-arith -Wchar-subscripts -Wcomment -Wformat -Wwrite-strings -Wno-unused-function -Wno-unused-parameter -Wno-strict-aliasing")
ENDIF()
# New tests should use the Catch Framework
-set(UNIT_TESTS src/kernel/resource/profile/trace_mgr_test.cpp
+set(UNIT_TESTS src/xbt/unit-tests_main.cpp
+ src/kernel/resource/profile/trace_mgr_test.cpp
src/xbt/config_test.cpp
src/xbt/dict_test.cpp
src/xbt/dynar_test.cpp